* [PATCH] btrfs: scrub: fix failed to detect checksum error
@ 2022-11-06 14:35 Li Zhang
2022-11-06 22:57 ` Qu Wenruo
0 siblings, 1 reply; 3+ messages in thread
From: Li Zhang @ 2022-11-06 14:35 UTC (permalink / raw)
To: linux-btrfs; +Cc: Li Zhang
[bug]
Scrub fails to detect checksum errors
To reproduce the problem:
$ truncate -s 250M loop_dev1
$ truncate -s 250M loop_dev2
$ losetup /dev/loop1 loop_dev1
$ losetup /dev/loop2 loop_dev2
$ mkfs.btrfs -mraid1 -draid1 /dev/loop1 /dev/loop2 -f
$ mount /dev/loop1 /mnt/
$ cp ~/btrfs/btrfs-progs/mkfs/main.c /mnt/
$ vim -b loop_dev1
....
free(label);
free(source_dir);
exit(1);
success:
exit(0);
}zhangli
....
$ sudo btrfs scrub start /mnt/
fsid:b66aa912-ae76-4b4b-881b-6a6840f06870 sock_path:/var/lib/btrfs/scrub.progress.b66aa912-ae76-4b4b-881b-6a6840f06870.
scrub started on /mnt/, fsid b66aa912-ae76-4b4b-881b-6a6840f06870 (pid=9894)
$ sudo btrfs scrub status /mnt/
UUID: b66aa912-ae76-4b4b-881b-6a6840f06870
Scrub started: Sun Nov 6 21:51:50 2022
Status: finished
Duration: 0:00:00
Total to scrub: 392.00KiB
Rate: 0.00B/s
Error summary: no errors found
[reason]
Because scrub only checks the first sector (scrub_sector) of
the sblock (scrub_block), it does not check other sectors.
[implementation]
1. Move the set sector (scrub_sector) csum from scrub_extent
to scrub_sectors, since each sector has an independent checksum.
2. In the scrub_checksum_data function, check all
sectors in the sblock.
3. In the scrub_setup_recheck_block function,
use sector_index to set the sector csum.
[test]
The test method is the same as the problem reproduction.
Can detect checksum errors and fix checksum errors
Below is the scrub output.
$ sudo btrfs scrub start /mnt/
fsid:b66aa912-ae76-4b4b-881b-6a6840f06870 sock_path:/var/lib/btrfs/scrub.progress.b66aa912-ae76-4b4b-881b-6a6840f06870.
scrub started on /mnt/, fsid b66aa912-ae76-4b4b-881b-6a6840f06870 (pid=11089)
$ sudo btrfs scrub status /mnt/WARNING: errors detected during scrubbing, corrected
UUID: b66aa912-ae76-4b4b-881b-6a6840f06870
Scrub started: Sun Nov 6 22:15:02 2022
Status: finished
Duration: 0:00:00
Total to scrub: 392.00KiB
Rate: 0.00B/s
Error summary: csum=1
Corrected: 1
Uncorrectable: 0
Unverified: 0
Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
---
Issue: 537
fs/btrfs/scrub.c | 58 ++++++++++++++++++++++++++++----------------------------
1 file changed, 29 insertions(+), 29 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f260c53..56ee600 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -404,7 +404,7 @@ static int scrub_write_sector_to_dev_replace(struct scrub_block *sblock,
static void scrub_parity_put(struct scrub_parity *sparity);
static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
u64 physical, struct btrfs_device *dev, u64 flags,
- u64 gen, int mirror_num, u8 *csum,
+ u64 gen, int mirror_num,
u64 physical_for_dev_replace);
static void scrub_bio_end_io(struct bio *bio);
static void scrub_bio_end_io_worker(struct work_struct *work);
@@ -420,6 +420,8 @@ static int scrub_add_sector_to_wr_bio(struct scrub_ctx *sctx,
static void scrub_wr_bio_end_io(struct bio *bio);
static void scrub_wr_bio_end_io_worker(struct work_struct *work);
static void scrub_put_ctx(struct scrub_ctx *sctx);
+static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum);
+
static inline int scrub_is_page_on_raid56(struct scrub_sector *sector)
{
@@ -1516,7 +1518,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
sector->have_csum = have_csum;
if (have_csum)
memcpy(sector->csum,
- original_sblock->sectors[0]->csum,
+ original_sblock->sectors[sector_index]->csum,
sctx->fs_info->csum_size);
scrub_stripe_index_and_offset(logical,
@@ -1984,21 +1986,22 @@ static int scrub_checksum_data(struct scrub_block *sblock)
u8 csum[BTRFS_CSUM_SIZE];
struct scrub_sector *sector;
char *kaddr;
+ int i;
BUG_ON(sblock->sector_count < 1);
- sector = sblock->sectors[0];
- if (!sector->have_csum)
- return 0;
-
- kaddr = scrub_sector_get_kaddr(sector);
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
+ for (i = 0; i < sblock->sector_count; i++) {
+ sector = sblock->sectors[i];
+ if (!sector->have_csum)
+ continue;
- crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
-
- if (memcmp(csum, sector->csum, fs_info->csum_size))
- sblock->checksum_error = 1;
+ kaddr = scrub_sector_get_kaddr(sector);
+ crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
+ if (memcmp(csum, sector->csum, fs_info->csum_size))
+ sblock->checksum_error = 1;
+ }
return sblock->checksum_error;
}
@@ -2400,12 +2403,14 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
u64 physical, struct btrfs_device *dev, u64 flags,
- u64 gen, int mirror_num, u8 *csum,
+ u64 gen, int mirror_num,
u64 physical_for_dev_replace)
{
struct scrub_block *sblock;
const u32 sectorsize = sctx->fs_info->sectorsize;
int index;
+ u8 csum[BTRFS_CSUM_SIZE];
+ int have_csum;
sblock = alloc_scrub_block(sctx, dev, logical, physical,
physical_for_dev_replace, mirror_num);
@@ -2424,7 +2429,6 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
* more memory for PAGE_SIZE > sectorsize case.
*/
u32 l = min(sectorsize, len);
-
sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL);
if (!sector) {
spin_lock(&sctx->stat_lock);
@@ -2435,11 +2439,16 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
}
sector->flags = flags;
sector->generation = gen;
- if (csum) {
- sector->have_csum = 1;
- memcpy(sector->csum, csum, sctx->fs_info->csum_size);
- } else {
- sector->have_csum = 0;
+ if (flags & BTRFS_EXTENT_FLAG_DATA) {
+ /* push csums to sbio */
+ have_csum = scrub_find_csum(sctx, logical, csum);
+ if (have_csum == 0) {
+ ++sctx->stat.no_csum;
+ sector->have_csum = 0;
+ } else {
+ sector->have_csum = 1;
+ memcpy(sector->csum, csum, sctx->fs_info->csum_size);
+ }
}
len -= l;
logical += l;
@@ -2669,7 +2678,6 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
u64 src_physical = physical;
int src_mirror = mirror_num;
int ret;
- u8 csum[BTRFS_CSUM_SIZE];
u32 blocksize;
/*
@@ -2715,17 +2723,9 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
&src_dev, &src_mirror);
while (len) {
u32 l = min(len, blocksize);
- int have_csum = 0;
-
- if (flags & BTRFS_EXTENT_FLAG_DATA) {
- /* push csums to sbio */
- have_csum = scrub_find_csum(sctx, logical, csum);
- if (have_csum == 0)
- ++sctx->stat.no_csum;
- }
ret = scrub_sectors(sctx, logical, l, src_physical, src_dev,
flags, gen, src_mirror,
- have_csum ? csum : NULL, physical);
+ physical);
if (ret)
return ret;
len -= l;
@@ -4155,7 +4155,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
ret = scrub_sectors(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
- NULL, bytenr);
+ bytenr);
if (ret)
return ret;
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] btrfs: scrub: fix failed to detect checksum error
2022-11-06 14:35 [PATCH] btrfs: scrub: fix failed to detect checksum error Li Zhang
@ 2022-11-06 22:57 ` Qu Wenruo
2022-11-07 15:54 ` li zhang
0 siblings, 1 reply; 3+ messages in thread
From: Qu Wenruo @ 2022-11-06 22:57 UTC (permalink / raw)
To: Li Zhang, linux-btrfs
On 2022/11/6 22:35, Li Zhang wrote:
> [bug]
> Scrub fails to detect checksum errors
> To reproduce the problem:
>
> $ truncate -s 250M loop_dev1
> $ truncate -s 250M loop_dev2
> $ losetup /dev/loop1 loop_dev1
> $ losetup /dev/loop2 loop_dev2
> $ mkfs.btrfs -mraid1 -draid1 /dev/loop1 /dev/loop2 -f
> $ mount /dev/loop1 /mnt/
> $ cp ~/btrfs/btrfs-progs/mkfs/main.c /mnt/
>
> $ vim -b loop_dev1
>
> ....
> free(label);
> free(source_dir);
> exit(1);
> success:
> exit(0);
> }zhangli
>
> ....
>
> $ sudo btrfs scrub start /mnt/
> fsid:b66aa912-ae76-4b4b-881b-6a6840f06870 sock_path:/var/lib/btrfs/scrub.progress.b66aa912-ae76-4b4b-881b-6a6840f06870.
> scrub started on /mnt/, fsid b66aa912-ae76-4b4b-881b-6a6840f06870 (pid=9894)
>
> $ sudo btrfs scrub status /mnt/
> UUID: b66aa912-ae76-4b4b-881b-6a6840f06870
> Scrub started: Sun Nov 6 21:51:50 2022
> Status: finished
> Duration: 0:00:00
> Total to scrub: 392.00KiB
> Rate: 0.00B/s
> Error summary: no errors found
>
> [reason]
> Because scrub only checks the first sector (scrub_sector) of
> the sblock (scrub_block), it does not check other sectors.
That's caused by commit 786672e9e1a3 ("btrfs: scrub: use larger block
size for data extent scrub"), which enlarged data scrub extent size
(previously always sectorsize, thus there will only be one sector per
scrub_block, thus it always works before that commit).
I'd prefer a revert before we have better fix.
>
> [implementation]
> 1. Move the set sector (scrub_sector) csum from scrub_extent
> to scrub_sectors, since each sector has an independent checksum.
> 2. In the scrub_checksum_data function, check all
> sectors in the sblock.
> 3. In the scrub_setup_recheck_block function,
> use sector_index to set the sector csum.
>
> [test]
> The test method is the same as the problem reproduction.
> Can detect checksum errors and fix checksum errors
> Below is the scrub output.
>
> $ sudo btrfs scrub start /mnt/
> fsid:b66aa912-ae76-4b4b-881b-6a6840f06870 sock_path:/var/lib/btrfs/scrub.progress.b66aa912-ae76-4b4b-881b-6a6840f06870.
> scrub started on /mnt/, fsid b66aa912-ae76-4b4b-881b-6a6840f06870 (pid=11089)
> $ sudo btrfs scrub status /mnt/WARNING: errors detected during scrubbing, corrected
>
> UUID: b66aa912-ae76-4b4b-881b-6a6840f06870
> Scrub started: Sun Nov 6 22:15:02 2022
> Status: finished
> Duration: 0:00:00
> Total to scrub: 392.00KiB
> Rate: 0.00B/s
> Error summary: csum=1
> Corrected: 1
> Uncorrectable: 0
> Unverified: 0
>
> Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
> ---
> Issue: 537
>
> fs/btrfs/scrub.c | 58 ++++++++++++++++++++++++++++----------------------------
> 1 file changed, 29 insertions(+), 29 deletions(-)
>
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index f260c53..56ee600 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -404,7 +404,7 @@ static int scrub_write_sector_to_dev_replace(struct scrub_block *sblock,
> static void scrub_parity_put(struct scrub_parity *sparity);
> static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
> u64 physical, struct btrfs_device *dev, u64 flags,
> - u64 gen, int mirror_num, u8 *csum,
> + u64 gen, int mirror_num,
> u64 physical_for_dev_replace);
> static void scrub_bio_end_io(struct bio *bio);
> static void scrub_bio_end_io_worker(struct work_struct *work);
> @@ -420,6 +420,8 @@ static int scrub_add_sector_to_wr_bio(struct scrub_ctx *sctx,
> static void scrub_wr_bio_end_io(struct bio *bio);
> static void scrub_wr_bio_end_io_worker(struct work_struct *work);
> static void scrub_put_ctx(struct scrub_ctx *sctx);
> +static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum);
> +
I don't think this is the way to go, since we can have a extent up to
STRIPE_LEN, going csum search again and again is not the proper way to go.
We need a function to grab a batch of csum in just one go.
>
> static inline int scrub_is_page_on_raid56(struct scrub_sector *sector)
> {
> @@ -1516,7 +1518,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
> sector->have_csum = have_csum;
> if (have_csum)
> memcpy(sector->csum,
> - original_sblock->sectors[0]->csum,
> + original_sblock->sectors[sector_index]->csum,
> sctx->fs_info->csum_size);
>
> scrub_stripe_index_and_offset(logical,
> @@ -1984,21 +1986,22 @@ static int scrub_checksum_data(struct scrub_block *sblock)
> u8 csum[BTRFS_CSUM_SIZE];
> struct scrub_sector *sector;
> char *kaddr;
> + int i;
>
> BUG_ON(sblock->sector_count < 1);
> - sector = sblock->sectors[0];
> - if (!sector->have_csum)
> - return 0;
> -
> - kaddr = scrub_sector_get_kaddr(sector);
>
> shash->tfm = fs_info->csum_shash;
> crypto_shash_init(shash);
> + for (i = 0; i < sblock->sector_count; i++) {
> + sector = sblock->sectors[i];
> + if (!sector->have_csum)
> + continue;
>
> - crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
> -
> - if (memcmp(csum, sector->csum, fs_info->csum_size))
> - sblock->checksum_error = 1;
> + kaddr = scrub_sector_get_kaddr(sector);
> + crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
> + if (memcmp(csum, sector->csum, fs_info->csum_size))
> + sblock->checksum_error = 1;
That would only increase checksum error by 1, but we may have multiple
corruptions in that extent.
This hotfix is going to screw up scrub error reporting.
Thanks,
Qu
> + }
> return sblock->checksum_error;
> }
>
> @@ -2400,12 +2403,14 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
>
> static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
> u64 physical, struct btrfs_device *dev, u64 flags,
> - u64 gen, int mirror_num, u8 *csum,
> + u64 gen, int mirror_num,
> u64 physical_for_dev_replace)
> {
> struct scrub_block *sblock;
> const u32 sectorsize = sctx->fs_info->sectorsize;
> int index;
> + u8 csum[BTRFS_CSUM_SIZE];
> + int have_csum;
>
> sblock = alloc_scrub_block(sctx, dev, logical, physical,
> physical_for_dev_replace, mirror_num);
> @@ -2424,7 +2429,6 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
> * more memory for PAGE_SIZE > sectorsize case.
> */
> u32 l = min(sectorsize, len);
> -
> sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL);
> if (!sector) {
> spin_lock(&sctx->stat_lock);
> @@ -2435,11 +2439,16 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
> }
> sector->flags = flags;
> sector->generation = gen;
> - if (csum) {
> - sector->have_csum = 1;
> - memcpy(sector->csum, csum, sctx->fs_info->csum_size);
> - } else {
> - sector->have_csum = 0;
> + if (flags & BTRFS_EXTENT_FLAG_DATA) {
> + /* push csums to sbio */
> + have_csum = scrub_find_csum(sctx, logical, csum);
> + if (have_csum == 0) {
> + ++sctx->stat.no_csum;
> + sector->have_csum = 0;
> + } else {
> + sector->have_csum = 1;
> + memcpy(sector->csum, csum, sctx->fs_info->csum_size);
> + }
> }
> len -= l;
> logical += l;
> @@ -2669,7 +2678,6 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
> u64 src_physical = physical;
> int src_mirror = mirror_num;
> int ret;
> - u8 csum[BTRFS_CSUM_SIZE];
> u32 blocksize;
>
> /*
> @@ -2715,17 +2723,9 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
> &src_dev, &src_mirror);
> while (len) {
> u32 l = min(len, blocksize);
> - int have_csum = 0;
> -
> - if (flags & BTRFS_EXTENT_FLAG_DATA) {
> - /* push csums to sbio */
> - have_csum = scrub_find_csum(sctx, logical, csum);
> - if (have_csum == 0)
> - ++sctx->stat.no_csum;
> - }
> ret = scrub_sectors(sctx, logical, l, src_physical, src_dev,
> flags, gen, src_mirror,
> - have_csum ? csum : NULL, physical);
> + physical);
> if (ret)
> return ret;
> len -= l;
> @@ -4155,7 +4155,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
>
> ret = scrub_sectors(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
> scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
> - NULL, bytenr);
> + bytenr);
> if (ret)
> return ret;
> }
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] btrfs: scrub: fix failed to detect checksum error
2022-11-06 22:57 ` Qu Wenruo
@ 2022-11-07 15:54 ` li zhang
0 siblings, 0 replies; 3+ messages in thread
From: li zhang @ 2022-11-07 15:54 UTC (permalink / raw)
To: Qu Wenruo; +Cc: linux-btrfs
Yes, I agree.
Would begin to work on it
Thanks,
Li
Qu Wenruo <quwenruo.btrfs@gmx.com> 于2022年11月7日周一 06:57写道:
>
>
>
> On 2022/11/6 22:35, Li Zhang wrote:
> > [bug]
> > Scrub fails to detect checksum errors
> > To reproduce the problem:
> >
> > $ truncate -s 250M loop_dev1
> > $ truncate -s 250M loop_dev2
> > $ losetup /dev/loop1 loop_dev1
> > $ losetup /dev/loop2 loop_dev2
> > $ mkfs.btrfs -mraid1 -draid1 /dev/loop1 /dev/loop2 -f
> > $ mount /dev/loop1 /mnt/
> > $ cp ~/btrfs/btrfs-progs/mkfs/main.c /mnt/
> >
> > $ vim -b loop_dev1
> >
> > ....
> > free(label);
> > free(source_dir);
> > exit(1);
> > success:
> > exit(0);
> > }zhangli
> >
> > ....
> >
> > $ sudo btrfs scrub start /mnt/
> > fsid:b66aa912-ae76-4b4b-881b-6a6840f06870 sock_path:/var/lib/btrfs/scrub.progress.b66aa912-ae76-4b4b-881b-6a6840f06870.
> > scrub started on /mnt/, fsid b66aa912-ae76-4b4b-881b-6a6840f06870 (pid=9894)
> >
> > $ sudo btrfs scrub status /mnt/
> > UUID: b66aa912-ae76-4b4b-881b-6a6840f06870
> > Scrub started: Sun Nov 6 21:51:50 2022
> > Status: finished
> > Duration: 0:00:00
> > Total to scrub: 392.00KiB
> > Rate: 0.00B/s
> > Error summary: no errors found
> >
> > [reason]
> > Because scrub only checks the first sector (scrub_sector) of
> > the sblock (scrub_block), it does not check other sectors.
>
> That's caused by commit 786672e9e1a3 ("btrfs: scrub: use larger block
> size for data extent scrub"), which enlarged data scrub extent size
> (previously always sectorsize, thus there will only be one sector per
> scrub_block, thus it always works before that commit).
>
> I'd prefer a revert before we have better fix.
>
> >
> > [implementation]
> > 1. Move the set sector (scrub_sector) csum from scrub_extent
> > to scrub_sectors, since each sector has an independent checksum.
> > 2. In the scrub_checksum_data function, check all
> > sectors in the sblock.
> > 3. In the scrub_setup_recheck_block function,
> > use sector_index to set the sector csum.
> >
> > [test]
> > The test method is the same as the problem reproduction.
> > Can detect checksum errors and fix checksum errors
> > Below is the scrub output.
> >
> > $ sudo btrfs scrub start /mnt/
> > fsid:b66aa912-ae76-4b4b-881b-6a6840f06870 sock_path:/var/lib/btrfs/scrub.progress.b66aa912-ae76-4b4b-881b-6a6840f06870.
> > scrub started on /mnt/, fsid b66aa912-ae76-4b4b-881b-6a6840f06870 (pid=11089)
> > $ sudo btrfs scrub status /mnt/WARNING: errors detected during scrubbing, corrected
> >
> > UUID: b66aa912-ae76-4b4b-881b-6a6840f06870
> > Scrub started: Sun Nov 6 22:15:02 2022
> > Status: finished
> > Duration: 0:00:00
> > Total to scrub: 392.00KiB
> > Rate: 0.00B/s
> > Error summary: csum=1
> > Corrected: 1
> > Uncorrectable: 0
> > Unverified: 0
> >
> > Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
> > ---
> > Issue: 537
> >
> > fs/btrfs/scrub.c | 58 ++++++++++++++++++++++++++++----------------------------
> > 1 file changed, 29 insertions(+), 29 deletions(-)
> >
> > diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> > index f260c53..56ee600 100644
> > --- a/fs/btrfs/scrub.c
> > +++ b/fs/btrfs/scrub.c
> > @@ -404,7 +404,7 @@ static int scrub_write_sector_to_dev_replace(struct scrub_block *sblock,
> > static void scrub_parity_put(struct scrub_parity *sparity);
> > static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
> > u64 physical, struct btrfs_device *dev, u64 flags,
> > - u64 gen, int mirror_num, u8 *csum,
> > + u64 gen, int mirror_num,
> > u64 physical_for_dev_replace);
> > static void scrub_bio_end_io(struct bio *bio);
> > static void scrub_bio_end_io_worker(struct work_struct *work);
> > @@ -420,6 +420,8 @@ static int scrub_add_sector_to_wr_bio(struct scrub_ctx *sctx,
> > static void scrub_wr_bio_end_io(struct bio *bio);
> > static void scrub_wr_bio_end_io_worker(struct work_struct *work);
> > static void scrub_put_ctx(struct scrub_ctx *sctx);
> > +static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum);
> > +
>
> I don't think this is the way to go, since we can have a extent up to
> STRIPE_LEN, going csum search again and again is not the proper way to go.
>
> We need a function to grab a batch of csum in just one go.
>
> >
> > static inline int scrub_is_page_on_raid56(struct scrub_sector *sector)
> > {
> > @@ -1516,7 +1518,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
> > sector->have_csum = have_csum;
> > if (have_csum)
> > memcpy(sector->csum,
> > - original_sblock->sectors[0]->csum,
> > + original_sblock->sectors[sector_index]->csum,
> > sctx->fs_info->csum_size);
> >
> > scrub_stripe_index_and_offset(logical,
> > @@ -1984,21 +1986,22 @@ static int scrub_checksum_data(struct scrub_block *sblock)
> > u8 csum[BTRFS_CSUM_SIZE];
> > struct scrub_sector *sector;
> > char *kaddr;
> > + int i;
> >
> > BUG_ON(sblock->sector_count < 1);
> > - sector = sblock->sectors[0];
> > - if (!sector->have_csum)
> > - return 0;
> > -
> > - kaddr = scrub_sector_get_kaddr(sector);
> >
> > shash->tfm = fs_info->csum_shash;
> > crypto_shash_init(shash);
> > + for (i = 0; i < sblock->sector_count; i++) {
> > + sector = sblock->sectors[i];
> > + if (!sector->have_csum)
> > + continue;
> >
> > - crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
> > -
> > - if (memcmp(csum, sector->csum, fs_info->csum_size))
> > - sblock->checksum_error = 1;
> > + kaddr = scrub_sector_get_kaddr(sector);
> > + crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
> > + if (memcmp(csum, sector->csum, fs_info->csum_size))
> > + sblock->checksum_error = 1;
>
> That would only increase checksum error by 1, but we may have multiple
> corruptions in that extent.
>
> This hotfix is going to screw up scrub error reporting.
>
> Thanks,
> Qu
> > + }
> > return sblock->checksum_error;
> > }
> >
> > @@ -2400,12 +2403,14 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
> >
> > static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
> > u64 physical, struct btrfs_device *dev, u64 flags,
> > - u64 gen, int mirror_num, u8 *csum,
> > + u64 gen, int mirror_num,
> > u64 physical_for_dev_replace)
> > {
> > struct scrub_block *sblock;
> > const u32 sectorsize = sctx->fs_info->sectorsize;
> > int index;
> > + u8 csum[BTRFS_CSUM_SIZE];
> > + int have_csum;
> >
> > sblock = alloc_scrub_block(sctx, dev, logical, physical,
> > physical_for_dev_replace, mirror_num);
> > @@ -2424,7 +2429,6 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
> > * more memory for PAGE_SIZE > sectorsize case.
> > */
> > u32 l = min(sectorsize, len);
> > -
> > sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL);
> > if (!sector) {
> > spin_lock(&sctx->stat_lock);
> > @@ -2435,11 +2439,16 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
> > }
> > sector->flags = flags;
> > sector->generation = gen;
> > - if (csum) {
> > - sector->have_csum = 1;
> > - memcpy(sector->csum, csum, sctx->fs_info->csum_size);
> > - } else {
> > - sector->have_csum = 0;
> > + if (flags & BTRFS_EXTENT_FLAG_DATA) {
> > + /* push csums to sbio */
> > + have_csum = scrub_find_csum(sctx, logical, csum);
> > + if (have_csum == 0) {
> > + ++sctx->stat.no_csum;
> > + sector->have_csum = 0;
> > + } else {
> > + sector->have_csum = 1;
> > + memcpy(sector->csum, csum, sctx->fs_info->csum_size);
> > + }
> > }
> > len -= l;
> > logical += l;
> > @@ -2669,7 +2678,6 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
> > u64 src_physical = physical;
> > int src_mirror = mirror_num;
> > int ret;
> > - u8 csum[BTRFS_CSUM_SIZE];
> > u32 blocksize;
> >
> > /*
> > @@ -2715,17 +2723,9 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
> > &src_dev, &src_mirror);
> > while (len) {
> > u32 l = min(len, blocksize);
> > - int have_csum = 0;
> > -
> > - if (flags & BTRFS_EXTENT_FLAG_DATA) {
> > - /* push csums to sbio */
> > - have_csum = scrub_find_csum(sctx, logical, csum);
> > - if (have_csum == 0)
> > - ++sctx->stat.no_csum;
> > - }
> > ret = scrub_sectors(sctx, logical, l, src_physical, src_dev,
> > flags, gen, src_mirror,
> > - have_csum ? csum : NULL, physical);
> > + physical);
> > if (ret)
> > return ret;
> > len -= l;
> > @@ -4155,7 +4155,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
> >
> > ret = scrub_sectors(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
> > scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
> > - NULL, bytenr);
> > + bytenr);
> > if (ret)
> > return ret;
> > }
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-11-07 15:54 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-06 14:35 [PATCH] btrfs: scrub: fix failed to detect checksum error Li Zhang
2022-11-06 22:57 ` Qu Wenruo
2022-11-07 15:54 ` li zhang
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.