All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
@ 2018-06-05  4:36 Qu Wenruo
  2018-06-05  5:34 ` Qu Wenruo
  2018-06-07  1:01 ` james harvey
  0 siblings, 2 replies; 12+ messages in thread
From: Qu Wenruo @ 2018-06-05  4:36 UTC (permalink / raw)
  To: linux-btrfs

[BUG]
Btrfs can easily create compressed extent without checksum (even
though it shouldn't), and if we then try to replace device containing
such extent, the result device will contain all the uncompressed data
instead of the compressed one.

Test case already submitted to fstests:
https://patchwork.kernel.org/patch/10442353/

[CAUSE]
When handling compressed extent without checksum, device replace will
goes into copy_nocow_pages() function.

In that function, btrfs will get all inodes referring to this data
extents and then use find_or_create_page() to get pages direct from that
inode.

The problem here is, pages directly from inode are always uncompressed.
And for compressed data extent, they mismatch with on-disk data.
Thus this leads to corrupted compressed data extent written to replace
device.

[FIX]
In this attempt, we could just remove the "optimization" branch, and let
unified scrub_pages() to handle it.

Although scrub_pages() won't bother reusing page cache, it will be a
little slower, but it does the correct csum checking and won't cause
such data corruption cause by "optimization".

Reported-by: James Harvey <jamespharvey20@gmail.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
Changelog:
ver.B:
    Instead of checking extent type in copy_nocow_pages() branch, just
    remove them all, since existing scrub_pages() can handle nodatasum
    case pretty well and it saves a lot of codes.
---
 fs/btrfs/scrub.c | 341 -----------------------------------------------
 1 file changed, 341 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 52b39a0924e9..799e4c6b2551 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -277,13 +277,6 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
 static void scrub_wr_submit(struct scrub_ctx *sctx);
 static void scrub_wr_bio_end_io(struct bio *bio);
 static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
-static int write_page_nocow(struct scrub_ctx *sctx,
-			    u64 physical_for_dev_replace, struct page *page);
-static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
-				      struct scrub_copy_nocow_ctx *ctx);
-static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
-			    int mirror_num, u64 physical_for_dev_replace);
-static void copy_nocow_pages_worker(struct btrfs_work *work);
 static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
 static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
 static void scrub_put_ctx(struct scrub_ctx *sctx);
@@ -2799,17 +2792,10 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
 			have_csum = scrub_find_csum(sctx, logical, csum);
 			if (have_csum == 0)
 				++sctx->stat.no_csum;
-			if (sctx->is_dev_replace && !have_csum) {
-				ret = copy_nocow_pages(sctx, logical, l,
-						       mirror_num,
-						      physical_for_dev_replace);
-				goto behind_scrub_pages;
-			}
 		}
 		ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
 				  mirror_num, have_csum ? csum : NULL, 0,
 				  physical_for_dev_replace);
-behind_scrub_pages:
 		if (ret)
 			return ret;
 		len -= l;
@@ -4357,330 +4343,3 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
 	*extent_dev = bbio->stripes[0].dev;
 	btrfs_put_bbio(bbio);
 }
-
-static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
-			    int mirror_num, u64 physical_for_dev_replace)
-{
-	struct scrub_copy_nocow_ctx *nocow_ctx;
-	struct btrfs_fs_info *fs_info = sctx->fs_info;
-
-	nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
-	if (!nocow_ctx) {
-		spin_lock(&sctx->stat_lock);
-		sctx->stat.malloc_errors++;
-		spin_unlock(&sctx->stat_lock);
-		return -ENOMEM;
-	}
-
-	scrub_pending_trans_workers_inc(sctx);
-
-	nocow_ctx->sctx = sctx;
-	nocow_ctx->logical = logical;
-	nocow_ctx->len = len;
-	nocow_ctx->mirror_num = mirror_num;
-	nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
-	btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
-			copy_nocow_pages_worker, NULL, NULL);
-	INIT_LIST_HEAD(&nocow_ctx->inodes);
-	btrfs_queue_work(fs_info->scrub_nocow_workers,
-			 &nocow_ctx->work);
-
-	return 0;
-}
-
-static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
-{
-	struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
-	struct scrub_nocow_inode *nocow_inode;
-
-	nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
-	if (!nocow_inode)
-		return -ENOMEM;
-	nocow_inode->inum = inum;
-	nocow_inode->offset = offset;
-	nocow_inode->root = root;
-	list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
-	return 0;
-}
-
-#define COPY_COMPLETE 1
-
-static void copy_nocow_pages_worker(struct btrfs_work *work)
-{
-	struct scrub_copy_nocow_ctx *nocow_ctx =
-		container_of(work, struct scrub_copy_nocow_ctx, work);
-	struct scrub_ctx *sctx = nocow_ctx->sctx;
-	struct btrfs_fs_info *fs_info = sctx->fs_info;
-	struct btrfs_root *root = fs_info->extent_root;
-	u64 logical = nocow_ctx->logical;
-	u64 len = nocow_ctx->len;
-	int mirror_num = nocow_ctx->mirror_num;
-	u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
-	int ret;
-	struct btrfs_trans_handle *trans = NULL;
-	struct btrfs_path *path;
-	int not_written = 0;
-
-	path = btrfs_alloc_path();
-	if (!path) {
-		spin_lock(&sctx->stat_lock);
-		sctx->stat.malloc_errors++;
-		spin_unlock(&sctx->stat_lock);
-		not_written = 1;
-		goto out;
-	}
-
-	trans = btrfs_join_transaction(root);
-	if (IS_ERR(trans)) {
-		not_written = 1;
-		goto out;
-	}
-
-	ret = iterate_inodes_from_logical(logical, fs_info, path,
-			record_inode_for_nocow, nocow_ctx, false);
-	if (ret != 0 && ret != -ENOENT) {
-		btrfs_warn(fs_info,
-			   "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
-			   logical, physical_for_dev_replace, len, mirror_num,
-			   ret);
-		not_written = 1;
-		goto out;
-	}
-
-	btrfs_end_transaction(trans);
-	trans = NULL;
-	while (!list_empty(&nocow_ctx->inodes)) {
-		struct scrub_nocow_inode *entry;
-		entry = list_first_entry(&nocow_ctx->inodes,
-					 struct scrub_nocow_inode,
-					 list);
-		list_del_init(&entry->list);
-		ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
-						 entry->root, nocow_ctx);
-		kfree(entry);
-		if (ret == COPY_COMPLETE) {
-			ret = 0;
-			break;
-		} else if (ret) {
-			break;
-		}
-	}
-out:
-	while (!list_empty(&nocow_ctx->inodes)) {
-		struct scrub_nocow_inode *entry;
-		entry = list_first_entry(&nocow_ctx->inodes,
-					 struct scrub_nocow_inode,
-					 list);
-		list_del_init(&entry->list);
-		kfree(entry);
-	}
-	if (trans && !IS_ERR(trans))
-		btrfs_end_transaction(trans);
-	if (not_written)
-		btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
-					    num_uncorrectable_read_errors);
-
-	btrfs_free_path(path);
-	kfree(nocow_ctx);
-
-	scrub_pending_trans_workers_dec(sctx);
-}
-
-static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
-				 u64 logical)
-{
-	struct extent_state *cached_state = NULL;
-	struct btrfs_ordered_extent *ordered;
-	struct extent_io_tree *io_tree;
-	struct extent_map *em;
-	u64 lockstart = start, lockend = start + len - 1;
-	int ret = 0;
-
-	io_tree = &inode->io_tree;
-
-	lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
-	ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
-	if (ordered) {
-		btrfs_put_ordered_extent(ordered);
-		ret = 1;
-		goto out_unlock;
-	}
-
-	em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
-	if (IS_ERR(em)) {
-		ret = PTR_ERR(em);
-		goto out_unlock;
-	}
-
-	/*
-	 * This extent does not actually cover the logical extent anymore,
-	 * move on to the next inode.
-	 */
-	if (em->block_start > logical ||
-	    em->block_start + em->block_len < logical + len ||
-	    test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
-		free_extent_map(em);
-		ret = 1;
-		goto out_unlock;
-	}
-	free_extent_map(em);
-
-out_unlock:
-	unlock_extent_cached(io_tree, lockstart, lockend, &cached_state);
-	return ret;
-}
-
-static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
-				      struct scrub_copy_nocow_ctx *nocow_ctx)
-{
-	struct btrfs_fs_info *fs_info = nocow_ctx->sctx->fs_info;
-	struct btrfs_key key;
-	struct inode *inode;
-	struct page *page;
-	struct btrfs_root *local_root;
-	struct extent_io_tree *io_tree;
-	u64 physical_for_dev_replace;
-	u64 nocow_ctx_logical;
-	u64 len = nocow_ctx->len;
-	unsigned long index;
-	int srcu_index;
-	int ret = 0;
-	int err = 0;
-
-	key.objectid = root;
-	key.type = BTRFS_ROOT_ITEM_KEY;
-	key.offset = (u64)-1;
-
-	srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
-
-	local_root = btrfs_read_fs_root_no_name(fs_info, &key);
-	if (IS_ERR(local_root)) {
-		srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
-		return PTR_ERR(local_root);
-	}
-
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.objectid = inum;
-	key.offset = 0;
-	inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
-	srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
-	if (IS_ERR(inode))
-		return PTR_ERR(inode);
-
-	/* Avoid truncate/dio/punch hole.. */
-	inode_lock(inode);
-	inode_dio_wait(inode);
-
-	physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
-	io_tree = &BTRFS_I(inode)->io_tree;
-	nocow_ctx_logical = nocow_ctx->logical;
-
-	ret = check_extent_to_block(BTRFS_I(inode), offset, len,
-			nocow_ctx_logical);
-	if (ret) {
-		ret = ret > 0 ? 0 : ret;
-		goto out;
-	}
-
-	while (len >= PAGE_SIZE) {
-		index = offset >> PAGE_SHIFT;
-again:
-		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
-		if (!page) {
-			btrfs_err(fs_info, "find_or_create_page() failed");
-			ret = -ENOMEM;
-			goto out;
-		}
-
-		if (PageUptodate(page)) {
-			if (PageDirty(page))
-				goto next_page;
-		} else {
-			ClearPageError(page);
-			err = extent_read_full_page(io_tree, page,
-							   btrfs_get_extent,
-							   nocow_ctx->mirror_num);
-			if (err) {
-				ret = err;
-				goto next_page;
-			}
-
-			lock_page(page);
-			/*
-			 * If the page has been remove from the page cache,
-			 * the data on it is meaningless, because it may be
-			 * old one, the new data may be written into the new
-			 * page in the page cache.
-			 */
-			if (page->mapping != inode->i_mapping) {
-				unlock_page(page);
-				put_page(page);
-				goto again;
-			}
-			if (!PageUptodate(page)) {
-				ret = -EIO;
-				goto next_page;
-			}
-		}
-
-		ret = check_extent_to_block(BTRFS_I(inode), offset, len,
-					    nocow_ctx_logical);
-		if (ret) {
-			ret = ret > 0 ? 0 : ret;
-			goto next_page;
-		}
-
-		err = write_page_nocow(nocow_ctx->sctx,
-				       physical_for_dev_replace, page);
-		if (err)
-			ret = err;
-next_page:
-		unlock_page(page);
-		put_page(page);
-
-		if (ret)
-			break;
-
-		offset += PAGE_SIZE;
-		physical_for_dev_replace += PAGE_SIZE;
-		nocow_ctx_logical += PAGE_SIZE;
-		len -= PAGE_SIZE;
-	}
-	ret = COPY_COMPLETE;
-out:
-	inode_unlock(inode);
-	iput(inode);
-	return ret;
-}
-
-static int write_page_nocow(struct scrub_ctx *sctx,
-			    u64 physical_for_dev_replace, struct page *page)
-{
-	struct bio *bio;
-	struct btrfs_device *dev;
-
-	dev = sctx->wr_tgtdev;
-	if (!dev)
-		return -EIO;
-	if (!dev->bdev) {
-		btrfs_warn_rl(dev->fs_info,
-			"scrub write_page_nocow(bdev == NULL) is unexpected");
-		return -EIO;
-	}
-	bio = btrfs_io_bio_alloc(1);
-	bio->bi_iter.bi_size = 0;
-	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
-	bio_set_dev(bio, dev->bdev);
-	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
-	/* bio_add_page won't fail on a freshly allocated bio */
-	bio_add_page(bio, page, PAGE_SIZE, 0);
-
-	if (btrfsic_submit_bio_wait(bio)) {
-		bio_put(bio);
-		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
-		return -EIO;
-	}
-
-	bio_put(bio);
-	return 0;
-}
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-05  4:36 [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace Qu Wenruo
@ 2018-06-05  5:34 ` Qu Wenruo
  2018-06-05 13:42   ` David Sterba
  2018-06-07  1:01 ` james harvey
  1 sibling, 1 reply; 12+ messages in thread
From: Qu Wenruo @ 2018-06-05  5:34 UTC (permalink / raw)
  To: Qu Wenruo, linux-btrfs, David Sterba


[-- Attachment #1.1: Type: text/plain, Size: 13250 bytes --]

Hi David,

It would be pretty nice if we could get this fix (or previous RFC patch)
to get into current release cycle.

As it's a unrecoverable data corruption, it would be better to get it
fixed as soon as possible.

Thanks,
Qu

On 2018年06月05日 12:36, Qu Wenruo wrote:
> [BUG]
> Btrfs can easily create compressed extent without checksum (even
> though it shouldn't), and if we then try to replace device containing
> such extent, the result device will contain all the uncompressed data
> instead of the compressed one.
> 
> Test case already submitted to fstests:
> https://patchwork.kernel.org/patch/10442353/
> 
> [CAUSE]
> When handling compressed extent without checksum, device replace will
> goes into copy_nocow_pages() function.
> 
> In that function, btrfs will get all inodes referring to this data
> extents and then use find_or_create_page() to get pages direct from that
> inode.
> 
> The problem here is, pages directly from inode are always uncompressed.
> And for compressed data extent, they mismatch with on-disk data.
> Thus this leads to corrupted compressed data extent written to replace
> device.
> 
> [FIX]
> In this attempt, we could just remove the "optimization" branch, and let
> unified scrub_pages() to handle it.
> 
> Although scrub_pages() won't bother reusing page cache, it will be a
> little slower, but it does the correct csum checking and won't cause
> such data corruption cause by "optimization".
> 
> Reported-by: James Harvey <jamespharvey20@gmail.com>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> Changelog:
> ver.B:
>     Instead of checking extent type in copy_nocow_pages() branch, just
>     remove them all, since existing scrub_pages() can handle nodatasum
>     case pretty well and it saves a lot of codes.
> ---
>  fs/btrfs/scrub.c | 341 -----------------------------------------------
>  1 file changed, 341 deletions(-)
> 
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index 52b39a0924e9..799e4c6b2551 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -277,13 +277,6 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
>  static void scrub_wr_submit(struct scrub_ctx *sctx);
>  static void scrub_wr_bio_end_io(struct bio *bio);
>  static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
> -static int write_page_nocow(struct scrub_ctx *sctx,
> -			    u64 physical_for_dev_replace, struct page *page);
> -static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
> -				      struct scrub_copy_nocow_ctx *ctx);
> -static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
> -			    int mirror_num, u64 physical_for_dev_replace);
> -static void copy_nocow_pages_worker(struct btrfs_work *work);
>  static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
>  static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
>  static void scrub_put_ctx(struct scrub_ctx *sctx);
> @@ -2799,17 +2792,10 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
>  			have_csum = scrub_find_csum(sctx, logical, csum);
>  			if (have_csum == 0)
>  				++sctx->stat.no_csum;
> -			if (sctx->is_dev_replace && !have_csum) {
> -				ret = copy_nocow_pages(sctx, logical, l,
> -						       mirror_num,
> -						      physical_for_dev_replace);
> -				goto behind_scrub_pages;
> -			}
>  		}
>  		ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
>  				  mirror_num, have_csum ? csum : NULL, 0,
>  				  physical_for_dev_replace);
> -behind_scrub_pages:
>  		if (ret)
>  			return ret;
>  		len -= l;
> @@ -4357,330 +4343,3 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
>  	*extent_dev = bbio->stripes[0].dev;
>  	btrfs_put_bbio(bbio);
>  }
> -
> -static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
> -			    int mirror_num, u64 physical_for_dev_replace)
> -{
> -	struct scrub_copy_nocow_ctx *nocow_ctx;
> -	struct btrfs_fs_info *fs_info = sctx->fs_info;
> -
> -	nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
> -	if (!nocow_ctx) {
> -		spin_lock(&sctx->stat_lock);
> -		sctx->stat.malloc_errors++;
> -		spin_unlock(&sctx->stat_lock);
> -		return -ENOMEM;
> -	}
> -
> -	scrub_pending_trans_workers_inc(sctx);
> -
> -	nocow_ctx->sctx = sctx;
> -	nocow_ctx->logical = logical;
> -	nocow_ctx->len = len;
> -	nocow_ctx->mirror_num = mirror_num;
> -	nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
> -	btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
> -			copy_nocow_pages_worker, NULL, NULL);
> -	INIT_LIST_HEAD(&nocow_ctx->inodes);
> -	btrfs_queue_work(fs_info->scrub_nocow_workers,
> -			 &nocow_ctx->work);
> -
> -	return 0;
> -}
> -
> -static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
> -{
> -	struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
> -	struct scrub_nocow_inode *nocow_inode;
> -
> -	nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
> -	if (!nocow_inode)
> -		return -ENOMEM;
> -	nocow_inode->inum = inum;
> -	nocow_inode->offset = offset;
> -	nocow_inode->root = root;
> -	list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
> -	return 0;
> -}
> -
> -#define COPY_COMPLETE 1
> -
> -static void copy_nocow_pages_worker(struct btrfs_work *work)
> -{
> -	struct scrub_copy_nocow_ctx *nocow_ctx =
> -		container_of(work, struct scrub_copy_nocow_ctx, work);
> -	struct scrub_ctx *sctx = nocow_ctx->sctx;
> -	struct btrfs_fs_info *fs_info = sctx->fs_info;
> -	struct btrfs_root *root = fs_info->extent_root;
> -	u64 logical = nocow_ctx->logical;
> -	u64 len = nocow_ctx->len;
> -	int mirror_num = nocow_ctx->mirror_num;
> -	u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
> -	int ret;
> -	struct btrfs_trans_handle *trans = NULL;
> -	struct btrfs_path *path;
> -	int not_written = 0;
> -
> -	path = btrfs_alloc_path();
> -	if (!path) {
> -		spin_lock(&sctx->stat_lock);
> -		sctx->stat.malloc_errors++;
> -		spin_unlock(&sctx->stat_lock);
> -		not_written = 1;
> -		goto out;
> -	}
> -
> -	trans = btrfs_join_transaction(root);
> -	if (IS_ERR(trans)) {
> -		not_written = 1;
> -		goto out;
> -	}
> -
> -	ret = iterate_inodes_from_logical(logical, fs_info, path,
> -			record_inode_for_nocow, nocow_ctx, false);
> -	if (ret != 0 && ret != -ENOENT) {
> -		btrfs_warn(fs_info,
> -			   "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
> -			   logical, physical_for_dev_replace, len, mirror_num,
> -			   ret);
> -		not_written = 1;
> -		goto out;
> -	}
> -
> -	btrfs_end_transaction(trans);
> -	trans = NULL;
> -	while (!list_empty(&nocow_ctx->inodes)) {
> -		struct scrub_nocow_inode *entry;
> -		entry = list_first_entry(&nocow_ctx->inodes,
> -					 struct scrub_nocow_inode,
> -					 list);
> -		list_del_init(&entry->list);
> -		ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
> -						 entry->root, nocow_ctx);
> -		kfree(entry);
> -		if (ret == COPY_COMPLETE) {
> -			ret = 0;
> -			break;
> -		} else if (ret) {
> -			break;
> -		}
> -	}
> -out:
> -	while (!list_empty(&nocow_ctx->inodes)) {
> -		struct scrub_nocow_inode *entry;
> -		entry = list_first_entry(&nocow_ctx->inodes,
> -					 struct scrub_nocow_inode,
> -					 list);
> -		list_del_init(&entry->list);
> -		kfree(entry);
> -	}
> -	if (trans && !IS_ERR(trans))
> -		btrfs_end_transaction(trans);
> -	if (not_written)
> -		btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
> -					    num_uncorrectable_read_errors);
> -
> -	btrfs_free_path(path);
> -	kfree(nocow_ctx);
> -
> -	scrub_pending_trans_workers_dec(sctx);
> -}
> -
> -static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
> -				 u64 logical)
> -{
> -	struct extent_state *cached_state = NULL;
> -	struct btrfs_ordered_extent *ordered;
> -	struct extent_io_tree *io_tree;
> -	struct extent_map *em;
> -	u64 lockstart = start, lockend = start + len - 1;
> -	int ret = 0;
> -
> -	io_tree = &inode->io_tree;
> -
> -	lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
> -	ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
> -	if (ordered) {
> -		btrfs_put_ordered_extent(ordered);
> -		ret = 1;
> -		goto out_unlock;
> -	}
> -
> -	em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
> -	if (IS_ERR(em)) {
> -		ret = PTR_ERR(em);
> -		goto out_unlock;
> -	}
> -
> -	/*
> -	 * This extent does not actually cover the logical extent anymore,
> -	 * move on to the next inode.
> -	 */
> -	if (em->block_start > logical ||
> -	    em->block_start + em->block_len < logical + len ||
> -	    test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
> -		free_extent_map(em);
> -		ret = 1;
> -		goto out_unlock;
> -	}
> -	free_extent_map(em);
> -
> -out_unlock:
> -	unlock_extent_cached(io_tree, lockstart, lockend, &cached_state);
> -	return ret;
> -}
> -
> -static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
> -				      struct scrub_copy_nocow_ctx *nocow_ctx)
> -{
> -	struct btrfs_fs_info *fs_info = nocow_ctx->sctx->fs_info;
> -	struct btrfs_key key;
> -	struct inode *inode;
> -	struct page *page;
> -	struct btrfs_root *local_root;
> -	struct extent_io_tree *io_tree;
> -	u64 physical_for_dev_replace;
> -	u64 nocow_ctx_logical;
> -	u64 len = nocow_ctx->len;
> -	unsigned long index;
> -	int srcu_index;
> -	int ret = 0;
> -	int err = 0;
> -
> -	key.objectid = root;
> -	key.type = BTRFS_ROOT_ITEM_KEY;
> -	key.offset = (u64)-1;
> -
> -	srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
> -
> -	local_root = btrfs_read_fs_root_no_name(fs_info, &key);
> -	if (IS_ERR(local_root)) {
> -		srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
> -		return PTR_ERR(local_root);
> -	}
> -
> -	key.type = BTRFS_INODE_ITEM_KEY;
> -	key.objectid = inum;
> -	key.offset = 0;
> -	inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
> -	srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
> -	if (IS_ERR(inode))
> -		return PTR_ERR(inode);
> -
> -	/* Avoid truncate/dio/punch hole.. */
> -	inode_lock(inode);
> -	inode_dio_wait(inode);
> -
> -	physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
> -	io_tree = &BTRFS_I(inode)->io_tree;
> -	nocow_ctx_logical = nocow_ctx->logical;
> -
> -	ret = check_extent_to_block(BTRFS_I(inode), offset, len,
> -			nocow_ctx_logical);
> -	if (ret) {
> -		ret = ret > 0 ? 0 : ret;
> -		goto out;
> -	}
> -
> -	while (len >= PAGE_SIZE) {
> -		index = offset >> PAGE_SHIFT;
> -again:
> -		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
> -		if (!page) {
> -			btrfs_err(fs_info, "find_or_create_page() failed");
> -			ret = -ENOMEM;
> -			goto out;
> -		}
> -
> -		if (PageUptodate(page)) {
> -			if (PageDirty(page))
> -				goto next_page;
> -		} else {
> -			ClearPageError(page);
> -			err = extent_read_full_page(io_tree, page,
> -							   btrfs_get_extent,
> -							   nocow_ctx->mirror_num);
> -			if (err) {
> -				ret = err;
> -				goto next_page;
> -			}
> -
> -			lock_page(page);
> -			/*
> -			 * If the page has been remove from the page cache,
> -			 * the data on it is meaningless, because it may be
> -			 * old one, the new data may be written into the new
> -			 * page in the page cache.
> -			 */
> -			if (page->mapping != inode->i_mapping) {
> -				unlock_page(page);
> -				put_page(page);
> -				goto again;
> -			}
> -			if (!PageUptodate(page)) {
> -				ret = -EIO;
> -				goto next_page;
> -			}
> -		}
> -
> -		ret = check_extent_to_block(BTRFS_I(inode), offset, len,
> -					    nocow_ctx_logical);
> -		if (ret) {
> -			ret = ret > 0 ? 0 : ret;
> -			goto next_page;
> -		}
> -
> -		err = write_page_nocow(nocow_ctx->sctx,
> -				       physical_for_dev_replace, page);
> -		if (err)
> -			ret = err;
> -next_page:
> -		unlock_page(page);
> -		put_page(page);
> -
> -		if (ret)
> -			break;
> -
> -		offset += PAGE_SIZE;
> -		physical_for_dev_replace += PAGE_SIZE;
> -		nocow_ctx_logical += PAGE_SIZE;
> -		len -= PAGE_SIZE;
> -	}
> -	ret = COPY_COMPLETE;
> -out:
> -	inode_unlock(inode);
> -	iput(inode);
> -	return ret;
> -}
> -
> -static int write_page_nocow(struct scrub_ctx *sctx,
> -			    u64 physical_for_dev_replace, struct page *page)
> -{
> -	struct bio *bio;
> -	struct btrfs_device *dev;
> -
> -	dev = sctx->wr_tgtdev;
> -	if (!dev)
> -		return -EIO;
> -	if (!dev->bdev) {
> -		btrfs_warn_rl(dev->fs_info,
> -			"scrub write_page_nocow(bdev == NULL) is unexpected");
> -		return -EIO;
> -	}
> -	bio = btrfs_io_bio_alloc(1);
> -	bio->bi_iter.bi_size = 0;
> -	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
> -	bio_set_dev(bio, dev->bdev);
> -	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
> -	/* bio_add_page won't fail on a freshly allocated bio */
> -	bio_add_page(bio, page, PAGE_SIZE, 0);
> -
> -	if (btrfsic_submit_bio_wait(bio)) {
> -		bio_put(bio);
> -		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
> -		return -EIO;
> -	}
> -
> -	bio_put(bio);
> -	return 0;
> -}
> 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-05  5:34 ` Qu Wenruo
@ 2018-06-05 13:42   ` David Sterba
  2018-06-05 13:47     ` Qu Wenruo
  0 siblings, 1 reply; 12+ messages in thread
From: David Sterba @ 2018-06-05 13:42 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: Qu Wenruo, linux-btrfs, David Sterba

On Tue, Jun 05, 2018 at 01:34:03PM +0800, Qu Wenruo wrote:
> Hi David,
> 
> It would be pretty nice if we could get this fix (or previous RFC patch)
> to get into current release cycle.
> 
> As it's a unrecoverable data corruption, it would be better to get it
> fixed as soon as possible.

That we can do, I'm planning to send 2nd pull by the end of the next
week as there's at least one patch in the queue now.

This patch seems to big, can you please prepare a minimal version?
Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-05 13:42   ` David Sterba
@ 2018-06-05 13:47     ` Qu Wenruo
  2018-06-05 14:07       ` David Sterba
  0 siblings, 1 reply; 12+ messages in thread
From: Qu Wenruo @ 2018-06-05 13:47 UTC (permalink / raw)
  To: dsterba, Qu Wenruo, Qu Wenruo, linux-btrfs


[-- Attachment #1.1: Type: text/plain, Size: 807 bytes --]



On 2018年06月05日 21:42, David Sterba wrote:
> On Tue, Jun 05, 2018 at 01:34:03PM +0800, Qu Wenruo wrote:
>> Hi David,
>>
>> It would be pretty nice if we could get this fix (or previous RFC patch)
>> to get into current release cycle.
>>
>> As it's a unrecoverable data corruption, it would be better to get it
>> fixed as soon as possible.
> 
> That we can do, I'm planning to send 2nd pull by the end of the next
> week as there's at least one patch in the queue now.
> 
> This patch seems to big, can you please prepare a minimal version?

The previous version (a completely different direction though) is much
smaller.
https://patchwork.kernel.org/patch/10440541/

However personally speaking, I still prefer this one, as it's much simpler.

Thanks,
Qu

> Thanks.
> 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-05 13:47     ` Qu Wenruo
@ 2018-06-05 14:07       ` David Sterba
  2018-06-05 14:14         ` Qu Wenruo
  0 siblings, 1 reply; 12+ messages in thread
From: David Sterba @ 2018-06-05 14:07 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: dsterba, Qu Wenruo, Qu Wenruo, linux-btrfs

On Tue, Jun 05, 2018 at 09:47:46PM +0800, Qu Wenruo wrote:
> 
> 
> On 2018年06月05日 21:42, David Sterba wrote:
> > On Tue, Jun 05, 2018 at 01:34:03PM +0800, Qu Wenruo wrote:
> >> Hi David,
> >>
> >> It would be pretty nice if we could get this fix (or previous RFC patch)
> >> to get into current release cycle.
> >>
> >> As it's a unrecoverable data corruption, it would be better to get it
> >> fixed as soon as possible.
> > 
> > That we can do, I'm planning to send 2nd pull by the end of the next
> > week as there's at least one patch in the queue now.
> > 
> > This patch seems to big, can you please prepare a minimal version?
> 
> The previous version (a completely different direction though) is much
> smaller.
> https://patchwork.kernel.org/patch/10440541/
> 
> However personally speaking, I still prefer this one, as it's much simpler.

As this will go to older stable kernels, I'd rather split that to more
patches where the first one is

--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2799,7 +2799,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
                        have_csum = scrub_find_csum(sctx, logical, csum);
                        if (have_csum == 0)
                                ++sctx->stat.no_csum;
-                       if (sctx->is_dev_replace && !have_csum) {
+                       if (0 && sctx->is_dev_replace && !have_csum) {
                                ret = copy_nocow_pages(sctx, logical, l,
                                                       mirror_num,
                                                      physical_for_dev_replace);
---

and then the whole callchain of copy_nocow_pages continues.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-05 14:07       ` David Sterba
@ 2018-06-05 14:14         ` Qu Wenruo
  2018-06-05 14:24           ` David Sterba
  2018-06-09  0:33           ` David Sterba
  0 siblings, 2 replies; 12+ messages in thread
From: Qu Wenruo @ 2018-06-05 14:14 UTC (permalink / raw)
  To: dsterba, Qu Wenruo, Qu Wenruo, linux-btrfs


[-- Attachment #1.1: Type: text/plain, Size: 2129 bytes --]



On 2018年06月05日 22:07, David Sterba wrote:
> On Tue, Jun 05, 2018 at 09:47:46PM +0800, Qu Wenruo wrote:
>>
>>
>> On 2018年06月05日 21:42, David Sterba wrote:
>>> On Tue, Jun 05, 2018 at 01:34:03PM +0800, Qu Wenruo wrote:
>>>> Hi David,
>>>>
>>>> It would be pretty nice if we could get this fix (or previous RFC patch)
>>>> to get into current release cycle.
>>>>
>>>> As it's a unrecoverable data corruption, it would be better to get it
>>>> fixed as soon as possible.
>>>
>>> That we can do, I'm planning to send 2nd pull by the end of the next
>>> week as there's at least one patch in the queue now.
>>>
>>> This patch seems to big, can you please prepare a minimal version?
>>
>> The previous version (a completely different direction though) is much
>> smaller.
>> https://patchwork.kernel.org/patch/10440541/
>>
>> However personally speaking, I still prefer this one, as it's much simpler.
> 
> As this will go to older stable kernels, I'd rather split that to more
> patches where the first one is
> 
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -2799,7 +2799,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
>                         have_csum = scrub_find_csum(sctx, logical, csum);
>                         if (have_csum == 0)
>                                 ++sctx->stat.no_csum;
> -                       if (sctx->is_dev_replace && !have_csum) {
> +                       if (0 && sctx->is_dev_replace && !have_csum) {
>                                 ret = copy_nocow_pages(sctx, logical, l,
>                                                        mirror_num,
>                                                       physical_for_dev_replace);
> ---
> 
> and then the whole callchain of copy_nocow_pages continues.

Understood.
I could go this method.

However I'm a little concerned about such "if (0 &&" usage.

Although with gcc 8.1 it works without any warning, it still looks
pretty strange, as compiler could one day detect such dead branch and
find copy_nocow_pages() is never used.

Thanks,
Qu


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-05 14:14         ` Qu Wenruo
@ 2018-06-05 14:24           ` David Sterba
  2018-06-05 14:30             ` Nikolay Borisov
  2018-06-09  0:33           ` David Sterba
  1 sibling, 1 reply; 12+ messages in thread
From: David Sterba @ 2018-06-05 14:24 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: dsterba, Qu Wenruo, Qu Wenruo, linux-btrfs

On Tue, Jun 05, 2018 at 10:14:51PM +0800, Qu Wenruo wrote:
> >> The previous version (a completely different direction though) is much
> >> smaller.
> >> https://patchwork.kernel.org/patch/10440541/
> >>
> >> However personally speaking, I still prefer this one, as it's much simpler.
> > 
> > As this will go to older stable kernels, I'd rather split that to more
> > patches where the first one is
> > 
> > --- a/fs/btrfs/scrub.c
> > +++ b/fs/btrfs/scrub.c
> > @@ -2799,7 +2799,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
> >                         have_csum = scrub_find_csum(sctx, logical, csum);
> >                         if (have_csum == 0)
> >                                 ++sctx->stat.no_csum;
> > -                       if (sctx->is_dev_replace && !have_csum) {
> > +                       if (0 && sctx->is_dev_replace && !have_csum) {
> >                                 ret = copy_nocow_pages(sctx, logical, l,
> >                                                        mirror_num,
> >                                                       physical_for_dev_replace);
> > ---
> > 
> > and then the whole callchain of copy_nocow_pages continues.
> 
> Understood.
> I could go this method.
> 
> However I'm a little concerned about such "if (0 &&" usage.
> 
> Although with gcc 8.1 it works without any warning, it still looks
> pretty strange, as compiler could one day detect such dead branch and
> find copy_nocow_pages() is never used.

I've checked that this does not produce any warnings, gcc 7.3.1. The
condition looks strange but does what we want. The whole series will be
in 4.18, the first patch in stable versions. If gcc does not warn today,
it will not in the future in any of the versions.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-05 14:24           ` David Sterba
@ 2018-06-05 14:30             ` Nikolay Borisov
  2018-06-06 12:07               ` David Sterba
  0 siblings, 1 reply; 12+ messages in thread
From: Nikolay Borisov @ 2018-06-05 14:30 UTC (permalink / raw)
  To: dsterba, Qu Wenruo, Qu Wenruo, Qu Wenruo, linux-btrfs



On  5.06.2018 17:24, David Sterba wrote:
> On Tue, Jun 05, 2018 at 10:14:51PM +0800, Qu Wenruo wrote:
>>>> The previous version (a completely different direction though) is much
>>>> smaller.
>>>> https://patchwork.kernel.org/patch/10440541/
>>>>
>>>> However personally speaking, I still prefer this one, as it's much simpler.
>>>
>>> As this will go to older stable kernels, I'd rather split that to more
>>> patches where the first one is
>>>
>>> --- a/fs/btrfs/scrub.c
>>> +++ b/fs/btrfs/scrub.c
>>> @@ -2799,7 +2799,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
>>>                         have_csum = scrub_find_csum(sctx, logical, csum);
>>>                         if (have_csum == 0)
>>>                                 ++sctx->stat.no_csum;
>>> -                       if (sctx->is_dev_replace && !have_csum) {
>>> +                       if (0 && sctx->is_dev_replace && !have_csum) {
>>>                                 ret = copy_nocow_pages(sctx, logical, l,
>>>                                                        mirror_num,
>>>                                                       physical_for_dev_replace);
>>> ---
>>>
>>> and then the whole callchain of copy_nocow_pages continues.
>>
>> Understood.
>> I could go this method.
>>
>> However I'm a little concerned about such "if (0 &&" usage.
>>
>> Although with gcc 8.1 it works without any warning, it still looks
>> pretty strange, as compiler could one day detect such dead branch and
>> find copy_nocow_pages() is never used.
> 
> I've checked that this does not produce any warnings, gcc 7.3.1. The
> condition looks strange but does what we want. The whole series will be
> in 4.18, the first patch in stable versions. If gcc does not warn today,
> it will not in the future in any of the versions.

WOuld it be possible to then take Qu's patch which deletes a lot of code
for 4.19?

> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-05 14:30             ` Nikolay Borisov
@ 2018-06-06 12:07               ` David Sterba
  0 siblings, 0 replies; 12+ messages in thread
From: David Sterba @ 2018-06-06 12:07 UTC (permalink / raw)
  To: Nikolay Borisov; +Cc: dsterba, Qu Wenruo, Qu Wenruo, Qu Wenruo, linux-btrfs

On Tue, Jun 05, 2018 at 05:30:18PM +0300, Nikolay Borisov wrote:
> On  5.06.2018 17:24, David Sterba wrote:
> > On Tue, Jun 05, 2018 at 10:14:51PM +0800, Qu Wenruo wrote:
> >>> and then the whole callchain of copy_nocow_pages continues.
> >>
> >> Understood.
> >> I could go this method.
> >>
> >> However I'm a little concerned about such "if (0 &&" usage.
> >>
> >> Although with gcc 8.1 it works without any warning, it still looks
> >> pretty strange, as compiler could one day detect such dead branch and
> >> find copy_nocow_pages() is never used.
> > 
> > I've checked that this does not produce any warnings, gcc 7.3.1. The
> > condition looks strange but does what we want. The whole series will be
> > in 4.18, the first patch in stable versions. If gcc does not warn today,
> > it will not in the future in any of the versions.
> 
> WOuld it be possible to then take Qu's patch which deletes a lot of code
> for 4.19?

Of course, the deletion part will follow, we just need to split it so
the patches do not cause backporting conflicts.  Reading what I wrote
above, it's not clearl but it was the intention.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-05  4:36 [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace Qu Wenruo
  2018-06-05  5:34 ` Qu Wenruo
@ 2018-06-07  1:01 ` james harvey
  1 sibling, 0 replies; 12+ messages in thread
From: james harvey @ 2018-06-07  1:01 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: Btrfs BTRFS

On Tue, Jun 5, 2018 at 12:36 AM, Qu Wenruo <wqu@suse.com> wrote:
> [BUG]
> Btrfs can easily create compressed extent without checksum (even
> though it shouldn't), and if we then try to replace device containing
> such extent, the result device will contain all the uncompressed data
> instead of the compressed one.
>
> Test case already submitted to fstests:
> https://patchwork.kernel.org/patch/10442353/
>
> [CAUSE]
> When handling compressed extent without checksum, device replace will
> goes into copy_nocow_pages() function.
>
> In that function, btrfs will get all inodes referring to this data
> extents and then use find_or_create_page() to get pages direct from that
> inode.
>
> The problem here is, pages directly from inode are always uncompressed.
> And for compressed data extent, they mismatch with on-disk data.
> Thus this leads to corrupted compressed data extent written to replace
> device.
>
> [FIX]
> In this attempt, we could just remove the "optimization" branch, and let
> unified scrub_pages() to handle it.
>
> Although scrub_pages() won't bother reusing page cache, it will be a
> little slower, but it does the correct csum checking and won't cause
> such data corruption cause by "optimization".
>
> Reported-by: James Harvey <jamespharvey20@gmail.com>
> Signed-off-by: Qu Wenruo <wqu@suse.com>

Reviewed-by: James Harvey <jamespharvey20@gmail.com>



As expected, this fixes the problem.

I originally ran into this running btrfs replace on devid 1, on a 3
device RAID1.

I temporarily restored dd images of devid 2 & 3, from before I ran the
original replace.  I was able to run "btrfs replace" with a btrfs
module loaded with this patch.

There's definitely no corruption.  I excessively checked, multiple ways.

I haven't tried with the previous RFC patch since it looks like this
one is getting used instead, but if requested to, I'd try that one.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-05 14:14         ` Qu Wenruo
  2018-06-05 14:24           ` David Sterba
@ 2018-06-09  0:33           ` David Sterba
  2018-06-09  7:09             ` Qu Wenruo
  1 sibling, 1 reply; 12+ messages in thread
From: David Sterba @ 2018-06-09  0:33 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: dsterba, Qu Wenruo, Qu Wenruo, linux-btrfs

On Tue, Jun 05, 2018 at 10:14:51PM +0800, Qu Wenruo wrote:
> 
> 
> On 2018年06月05日 22:07, David Sterba wrote:
> > On Tue, Jun 05, 2018 at 09:47:46PM +0800, Qu Wenruo wrote:
> >>
> >>
> >> On 2018年06月05日 21:42, David Sterba wrote:
> >>> On Tue, Jun 05, 2018 at 01:34:03PM +0800, Qu Wenruo wrote:
> >>>> Hi David,
> >>>>
> >>>> It would be pretty nice if we could get this fix (or previous RFC patch)
> >>>> to get into current release cycle.
> >>>>
> >>>> As it's a unrecoverable data corruption, it would be better to get it
> >>>> fixed as soon as possible.
> >>>
> >>> That we can do, I'm planning to send 2nd pull by the end of the next
> >>> week as there's at least one patch in the queue now.
> >>>
> >>> This patch seems to big, can you please prepare a minimal version?
> >>
> >> The previous version (a completely different direction though) is much
> >> smaller.
> >> https://patchwork.kernel.org/patch/10440541/
> >>
> >> However personally speaking, I still prefer this one, as it's much simpler.
> > 
> > As this will go to older stable kernels, I'd rather split that to more
> > patches where the first one is
> > 
> > --- a/fs/btrfs/scrub.c
> > +++ b/fs/btrfs/scrub.c
> > @@ -2799,7 +2799,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
> >                         have_csum = scrub_find_csum(sctx, logical, csum);
> >                         if (have_csum == 0)
> >                                 ++sctx->stat.no_csum;
> > -                       if (sctx->is_dev_replace && !have_csum) {
> > +                       if (0 && sctx->is_dev_replace && !have_csum) {
> >                                 ret = copy_nocow_pages(sctx, logical, l,
> >                                                        mirror_num,
> >                                                       physical_for_dev_replace);
> > ---
> > 
> > and then the whole callchain of copy_nocow_pages continues.
> 
> Understood.
> I could go this method.

FYI, I'd need to send the 2nd pull request on Tuesday so I'm adding the
proposed fix with the current changelog to the queue now.

https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git/commit/?h=next-fixes&id=8c83e0b1b20b094491bec6c52839aa3596a87f03

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace
  2018-06-09  0:33           ` David Sterba
@ 2018-06-09  7:09             ` Qu Wenruo
  0 siblings, 0 replies; 12+ messages in thread
From: Qu Wenruo @ 2018-06-09  7:09 UTC (permalink / raw)
  To: dsterba, Qu Wenruo, Qu Wenruo, linux-btrfs


[-- Attachment #1.1: Type: text/plain, Size: 2498 bytes --]



On 2018年06月09日 08:33, David Sterba wrote:
> On Tue, Jun 05, 2018 at 10:14:51PM +0800, Qu Wenruo wrote:
>>
>>
>> On 2018年06月05日 22:07, David Sterba wrote:
>>> On Tue, Jun 05, 2018 at 09:47:46PM +0800, Qu Wenruo wrote:
>>>>
>>>>
>>>> On 2018年06月05日 21:42, David Sterba wrote:
>>>>> On Tue, Jun 05, 2018 at 01:34:03PM +0800, Qu Wenruo wrote:
>>>>>> Hi David,
>>>>>>
>>>>>> It would be pretty nice if we could get this fix (or previous RFC patch)
>>>>>> to get into current release cycle.
>>>>>>
>>>>>> As it's a unrecoverable data corruption, it would be better to get it
>>>>>> fixed as soon as possible.
>>>>>
>>>>> That we can do, I'm planning to send 2nd pull by the end of the next
>>>>> week as there's at least one patch in the queue now.
>>>>>
>>>>> This patch seems to big, can you please prepare a minimal version?
>>>>
>>>> The previous version (a completely different direction though) is much
>>>> smaller.
>>>> https://patchwork.kernel.org/patch/10440541/
>>>>
>>>> However personally speaking, I still prefer this one, as it's much simpler.
>>>
>>> As this will go to older stable kernels, I'd rather split that to more
>>> patches where the first one is
>>>
>>> --- a/fs/btrfs/scrub.c
>>> +++ b/fs/btrfs/scrub.c
>>> @@ -2799,7 +2799,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
>>>                         have_csum = scrub_find_csum(sctx, logical, csum);
>>>                         if (have_csum == 0)
>>>                                 ++sctx->stat.no_csum;
>>> -                       if (sctx->is_dev_replace && !have_csum) {
>>> +                       if (0 && sctx->is_dev_replace && !have_csum) {
>>>                                 ret = copy_nocow_pages(sctx, logical, l,
>>>                                                        mirror_num,
>>>                                                       physical_for_dev_replace);
>>> ---
>>>
>>> and then the whole callchain of copy_nocow_pages continues.
>>
>> Understood.
>> I could go this method.
> 
> FYI, I'd need to send the 2nd pull request on Tuesday so I'm adding the
> proposed fix with the current changelog to the queue now.
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git/commit/?h=next-fixes&id=8c83e0b1b20b094491bec6c52839aa3596a87f03

The note looks better than the final version I sent.
https://patchwork.kernel.org/patch/10449605/

Glad to see it merged.

Thanks,
Qu


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2018-06-09  7:10 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-05  4:36 [PATCH RFC ver.B] btrfs: scrub: Don't use inode pages for device replace Qu Wenruo
2018-06-05  5:34 ` Qu Wenruo
2018-06-05 13:42   ` David Sterba
2018-06-05 13:47     ` Qu Wenruo
2018-06-05 14:07       ` David Sterba
2018-06-05 14:14         ` Qu Wenruo
2018-06-05 14:24           ` David Sterba
2018-06-05 14:30             ` Nikolay Borisov
2018-06-06 12:07               ` David Sterba
2018-06-09  0:33           ` David Sterba
2018-06-09  7:09             ` Qu Wenruo
2018-06-07  1:01 ` james harvey

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.