Linux-XFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/3] xfs_scrub: media scan entire disks
@ 2019-09-06  3:40 Darrick J. Wong
  2019-09-06  3:40 ` [PATCH 1/3] xfs_scrub: implement background mode for phase 6 Darrick J. Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:40 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

Hi all,

Enable the administrator to request a full-disk media scan so that they
can look for media problems in the free space, not just allocated file
data.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=scrub-scan-everything

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/3] xfs_scrub: implement background mode for phase 6
  2019-09-06  3:40 [PATCH 0/3] xfs_scrub: media scan entire disks Darrick J. Wong
@ 2019-09-06  3:40 ` Darrick J. Wong
  2019-09-12 23:42   ` Allison Collins
  2019-09-06  3:40 ` [PATCH 2/3] xfs_scrub: perform media scans of entire devices Darrick J. Wong
  2019-09-06  3:40 ` [PATCH 3/3] xfs_scrub: relabel verified data block counts in output Darrick J. Wong
  2 siblings, 1 reply; 8+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:40 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Phase 6 doesn't implement background mode, which means that it doesn't
run in single-threaded mode with one -b and it doesn't sleep between
calls with multiple -b like every other phase does.  Wire up the
necessary pieces to make it behave like the man page says it should.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/read_verify.c |   21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)


diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index 834571a7..414d25a6 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -32,7 +32,19 @@
  * because that's the biggest SCSI VERIFY(16) we dare to send.
  */
 #define RVP_IO_MAX_SIZE		(33554432)
-#define RVP_IO_MAX_SECTORS	(RVP_IO_MAX_SIZE >> BBSHIFT)
+
+/*
+ * If we're running in the background then we perform IO in 128k chunks
+ * to reduce the load on the IO subsystem.
+ */
+#define RVP_BACKGROUND_IO_MAX_SIZE	(131072)
+
+/* What's the real maximum IO size? */
+static inline unsigned int
+rvp_io_max_size(void)
+{
+	return bg_mode > 0 ? RVP_BACKGROUND_IO_MAX_SIZE : RVP_IO_MAX_SIZE;
+}
 
 /* Tolerate 64k holes in adjacent read verify requests. */
 #define RVP_IO_BATCH_LOCALITY	(65536)
@@ -84,7 +96,7 @@ read_verify_pool_alloc(
 	 */
 	if (miniosz % disk->d_lbasize)
 		return EINVAL;
-	if (RVP_IO_MAX_SIZE % miniosz)
+	if (rvp_io_max_size() % miniosz)
 		return EINVAL;
 
 	rvp = calloc(1, sizeof(struct read_verify_pool));
@@ -92,7 +104,7 @@ read_verify_pool_alloc(
 		return errno;
 
 	ret = posix_memalign((void **)&rvp->readbuf, page_size,
-			RVP_IO_MAX_SIZE);
+			rvp_io_max_size());
 	if (ret)
 		goto out_free;
 	ret = ptcounter_alloc(verifier_threads, &rvp->verified_bytes);
@@ -177,7 +189,7 @@ read_verify(
 	if (rvp->errors_seen)
 		return;
 
-	io_max_size = RVP_IO_MAX_SIZE;
+	io_max_size = rvp_io_max_size();
 
 	while (rv->io_length > 0) {
 		io_error = 0;
@@ -253,6 +265,7 @@ read_verify(
 			verified += sz;
 		rv->io_start += sz;
 		rv->io_length -= sz;
+		background_sleep();
 	}
 
 	free(rv);


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/3] xfs_scrub: perform media scans of entire devices
  2019-09-06  3:40 [PATCH 0/3] xfs_scrub: media scan entire disks Darrick J. Wong
  2019-09-06  3:40 ` [PATCH 1/3] xfs_scrub: implement background mode for phase 6 Darrick J. Wong
@ 2019-09-06  3:40 ` Darrick J. Wong
  2019-09-12 23:42   ` Allison Collins
  2019-09-06  3:40 ` [PATCH 3/3] xfs_scrub: relabel verified data block counts in output Darrick J. Wong
  2 siblings, 1 reply; 8+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:40 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Add a new feature to xfs_scrub where specifying multiple -x will cause
it to perform a media scan of the entire disk, not just the file data
areas.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 man/man8/xfs_scrub.8 |    3 +++
 scrub/phase6.c       |   60 ++++++++++++++++++++++++++++++++++++++++++++++----
 scrub/phase7.c       |    5 ++++
 scrub/xfs_scrub.c    |    4 ++-
 scrub/xfs_scrub.h    |    1 +
 5 files changed, 66 insertions(+), 7 deletions(-)


diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8
index 18948a4e..872a088c 100644
--- a/man/man8/xfs_scrub.8
+++ b/man/man8/xfs_scrub.8
@@ -97,6 +97,9 @@ Prints the version number and exits.
 .TP
 .B \-x
 Read all file data extents to look for disk errors.
+If this option is given more than once, scrub all disk contents.
+If this option is given more than twice, report errors even if they have not
+yet caused data loss.
 .B xfs_scrub
 will issue O_DIRECT reads to the block device directly.
 If the block device is a SCSI disk, it will instead issue READ VERIFY commands
diff --git a/scrub/phase6.c b/scrub/phase6.c
index c50fb8fb..7bfb856a 100644
--- a/scrub/phase6.c
+++ b/scrub/phase6.c
@@ -167,7 +167,9 @@ report_data_loss(
 	int				ret;
 
 	/* Only report errors for real extents. */
-	if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
+	if (scrub_data < 3 && (bmap->bm_flags & BMV_OF_PREALLOC))
+		return true;
+	if (bmap->bm_flags & BMV_OF_DELALLOC)
 		return true;
 
 	if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
@@ -355,7 +357,7 @@ ioerr_fsmap_report(
 	uint64_t		err_off;
 
 	/* Don't care about unwritten extents. */
-	if (map->fmr_flags & FMR_OF_PREALLOC)
+	if (scrub_data < 3 && (map->fmr_flags & FMR_OF_PREALLOC))
 		return true;
 
 	if (err_physical > map->fmr_physical)
@@ -602,6 +604,49 @@ clean_pool(
 	return ret;
 }
 
+/* Schedule an entire disk for read verification. */
+static int
+verify_entire_disk(
+	struct read_verify_pool		*rvp,
+	struct disk			*disk,
+	struct media_verify_state	*vs)
+{
+	return read_verify_schedule_io(rvp, 0, disk->d_size, vs);
+}
+
+/* Scan every part of every disk. */
+static bool
+verify_all_disks(
+	struct scrub_ctx		*ctx,
+	struct media_verify_state	*vs)
+{
+	int				ret;
+
+	ret = verify_entire_disk(vs->rvp_data, ctx->datadev, vs);
+	if (ret) {
+		str_liberror(ctx, ret, _("scheduling datadev verify"));
+		return false;
+	}
+
+	if (ctx->logdev) {
+		ret = verify_entire_disk(vs->rvp_log, ctx->logdev, vs);
+		if (ret) {
+			str_liberror(ctx, ret, _("scheduling logdev verify"));
+			return false;
+		}
+	}
+
+	if (ctx->rtdev) {
+		ret = verify_entire_disk(vs->rvp_realtime, ctx->rtdev, vs);
+		if (ret) {
+			str_liberror(ctx, ret, _("scheduling rtdev verify"));
+			return false;
+		}
+	}
+
+	return true;
+}
+
 /*
  * Read verify all the file data blocks in a filesystem.  Since XFS doesn't
  * do data checksums, we trust that the underlying storage will pass back
@@ -657,7 +702,11 @@ xfs_scan_blocks(
 			goto out_logpool;
 		}
 	}
-	moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs);
+
+	if (scrub_data > 1)
+		moveon = verify_all_disks(ctx, &vs);
+	else
+		moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs);
 	if (!moveon)
 		goto out_rtpool;
 
@@ -729,8 +778,9 @@ xfs_estimate_verify_work(
 	if (!moveon)
 		return moveon;
 
-	*items = cvt_off_fsb_to_b(&ctx->mnt,
-			(d_blocks - d_bfree) + (r_blocks - r_bfree));
+	*items = cvt_off_fsb_to_b(&ctx->mnt, d_blocks + r_blocks);
+	if (scrub_data == 1)
+		*items -= cvt_off_fsb_to_b(&ctx->mnt, d_bfree + r_bfree);
 	*nr_threads = disk_heads(ctx->datadev);
 	*rshift = 20;
 	return moveon;
diff --git a/scrub/phase7.c b/scrub/phase7.c
index bc959f5b..570ceb3f 100644
--- a/scrub/phase7.c
+++ b/scrub/phase7.c
@@ -255,6 +255,11 @@ _("%.*f%s inodes counted; %.*f%s inodes checked.\n"),
 		double		b1, b2;
 		char		*b1u, *b2u;
 
+		if (scrub_data > 1) {
+			used_data = cvt_off_fsb_to_b(&ctx->mnt, d_blocks);
+			used_rt = cvt_off_fsb_to_b(&ctx->mnt, r_blocks);
+		}
+
 		b1 = auto_space_units(used_data + used_rt, &b1u);
 		b2 = auto_space_units(ctx->bytes_checked, &b2u);
 		fprintf(stdout,
diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c
index 2d554340..46876522 100644
--- a/scrub/xfs_scrub.c
+++ b/scrub/xfs_scrub.c
@@ -139,7 +139,7 @@ unsigned int			force_nr_threads;
 bool				verbose;
 
 /* Should we scrub the data blocks? */
-static bool			scrub_data;
+int				scrub_data;
 
 /* Size of a memory page. */
 long				page_size;
@@ -666,7 +666,7 @@ main(
 			fflush(stdout);
 			return SCRUB_RET_SUCCESS;
 		case 'x':
-			scrub_data = true;
+			scrub_data++;
 			break;
 		case '?':
 			/* fall through */
diff --git a/scrub/xfs_scrub.h b/scrub/xfs_scrub.h
index 54876acb..6558bad7 100644
--- a/scrub/xfs_scrub.h
+++ b/scrub/xfs_scrub.h
@@ -21,6 +21,7 @@ extern bool			want_fstrim;
 extern bool			stderr_isatty;
 extern bool			stdout_isatty;
 extern bool			is_service;
+extern int			scrub_data;
 
 enum scrub_mode {
 	SCRUB_MODE_DRY_RUN,


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 3/3] xfs_scrub: relabel verified data block counts in output
  2019-09-06  3:40 [PATCH 0/3] xfs_scrub: media scan entire disks Darrick J. Wong
  2019-09-06  3:40 ` [PATCH 1/3] xfs_scrub: implement background mode for phase 6 Darrick J. Wong
  2019-09-06  3:40 ` [PATCH 2/3] xfs_scrub: perform media scans of entire devices Darrick J. Wong
@ 2019-09-06  3:40 ` Darrick J. Wong
  2019-09-12 23:42   ` Allison Collins
  2 siblings, 1 reply; 8+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:40 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Relabel the count of verified data blocks to make it more obvious that
we were only looking for file data.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/phase7.c    |   13 ++++++++-----
 scrub/xfs_scrub.c |    2 ++
 2 files changed, 10 insertions(+), 5 deletions(-)


diff --git a/scrub/phase7.c b/scrub/phase7.c
index 570ceb3f..2622bc45 100644
--- a/scrub/phase7.c
+++ b/scrub/phase7.c
@@ -116,6 +116,7 @@ xfs_scan_summary(
 	unsigned long long	f_free;
 	bool			moveon;
 	bool			complain;
+	bool			scrub_all = scrub_data > 1;
 	int			ip;
 	int			error;
 
@@ -244,14 +245,15 @@ _("%.*f%s inodes counted; %.*f%s inodes checked.\n"),
 	}
 
 	/*
-	 * Complain if the checked block counts are off, which
+	 * Complain if the data file verification block counts are off, which
 	 * implies an incomplete check.
 	 */
-	if (ctx->bytes_checked &&
+	if (scrub_data &&
 	    (verbose ||
 	     !within_range(ctx, used_data + used_rt,
 			ctx->bytes_checked, absdiff, 1, 10,
-			_("verified blocks")))) {
+			scrub_all ? _("verified blocks") :
+				    _("verified file data blocks")))) {
 		double		b1, b2;
 		char		*b1u, *b2u;
 
@@ -262,8 +264,9 @@ _("%.*f%s inodes counted; %.*f%s inodes checked.\n"),
 
 		b1 = auto_space_units(used_data + used_rt, &b1u);
 		b2 = auto_space_units(ctx->bytes_checked, &b2u);
-		fprintf(stdout,
-_("%.1f%s data counted; %.1f%s data verified.\n"),
+		fprintf(stdout, scrub_all ?
+_("%.1f%s data counted; %.1f%s disk media verified.\n") :
+_("%.1f%s data counted; %.1f%s file data media verified.\n"),
 				b1, b1u, b2, b2u);
 		fflush(stdout);
 	}
diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c
index 46876522..89f6c96a 100644
--- a/scrub/xfs_scrub.c
+++ b/scrub/xfs_scrub.c
@@ -432,6 +432,8 @@ run_scrub_phases(
 		/* Turn on certain phases if user said to. */
 		if (sp->fn == DATASCAN_DUMMY_FN && scrub_data) {
 			sp->fn = xfs_scan_blocks;
+			if (scrub_data > 1)
+				sp->descr = _("Verify disk integrity.");
 		} else if (sp->fn == REPAIR_DUMMY_FN &&
 			   ctx->mode == SCRUB_MODE_REPAIR) {
 			sp->descr = _("Repair filesystem.");


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] xfs_scrub: implement background mode for phase 6
  2019-09-06  3:40 ` [PATCH 1/3] xfs_scrub: implement background mode for phase 6 Darrick J. Wong
@ 2019-09-12 23:42   ` Allison Collins
  0 siblings, 0 replies; 8+ messages in thread
From: Allison Collins @ 2019-09-12 23:42 UTC (permalink / raw)
  To: Darrick J. Wong, sandeen; +Cc: linux-xfs

Looks ok to me.
Reviewed-by: Allison Collins <allison.henderson@oracle.com>

On 9/5/19 8:40 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Phase 6 doesn't implement background mode, which means that it doesn't
> run in single-threaded mode with one -b and it doesn't sleep between
> calls with multiple -b like every other phase does.  Wire up the
> necessary pieces to make it behave like the man page says it should.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>   scrub/read_verify.c |   21 +++++++++++++++++----
>   1 file changed, 17 insertions(+), 4 deletions(-)
> 
> 
> diff --git a/scrub/read_verify.c b/scrub/read_verify.c
> index 834571a7..414d25a6 100644
> --- a/scrub/read_verify.c
> +++ b/scrub/read_verify.c
> @@ -32,7 +32,19 @@
>    * because that's the biggest SCSI VERIFY(16) we dare to send.
>    */
>   #define RVP_IO_MAX_SIZE		(33554432)
> -#define RVP_IO_MAX_SECTORS	(RVP_IO_MAX_SIZE >> BBSHIFT)
> +
> +/*
> + * If we're running in the background then we perform IO in 128k chunks
> + * to reduce the load on the IO subsystem.
> + */
> +#define RVP_BACKGROUND_IO_MAX_SIZE	(131072)
> +
> +/* What's the real maximum IO size? */
> +static inline unsigned int
> +rvp_io_max_size(void)
> +{
> +	return bg_mode > 0 ? RVP_BACKGROUND_IO_MAX_SIZE : RVP_IO_MAX_SIZE;
> +}
>   
>   /* Tolerate 64k holes in adjacent read verify requests. */
>   #define RVP_IO_BATCH_LOCALITY	(65536)
> @@ -84,7 +96,7 @@ read_verify_pool_alloc(
>   	 */
>   	if (miniosz % disk->d_lbasize)
>   		return EINVAL;
> -	if (RVP_IO_MAX_SIZE % miniosz)
> +	if (rvp_io_max_size() % miniosz)
>   		return EINVAL;
>   
>   	rvp = calloc(1, sizeof(struct read_verify_pool));
> @@ -92,7 +104,7 @@ read_verify_pool_alloc(
>   		return errno;
>   
>   	ret = posix_memalign((void **)&rvp->readbuf, page_size,
> -			RVP_IO_MAX_SIZE);
> +			rvp_io_max_size());
>   	if (ret)
>   		goto out_free;
>   	ret = ptcounter_alloc(verifier_threads, &rvp->verified_bytes);
> @@ -177,7 +189,7 @@ read_verify(
>   	if (rvp->errors_seen)
>   		return;
>   
> -	io_max_size = RVP_IO_MAX_SIZE;
> +	io_max_size = rvp_io_max_size();
>   
>   	while (rv->io_length > 0) {
>   		io_error = 0;
> @@ -253,6 +265,7 @@ read_verify(
>   			verified += sz;
>   		rv->io_start += sz;
>   		rv->io_length -= sz;
> +		background_sleep();
>   	}
>   
>   	free(rv);
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] xfs_scrub: perform media scans of entire devices
  2019-09-06  3:40 ` [PATCH 2/3] xfs_scrub: perform media scans of entire devices Darrick J. Wong
@ 2019-09-12 23:42   ` Allison Collins
  0 siblings, 0 replies; 8+ messages in thread
From: Allison Collins @ 2019-09-12 23:42 UTC (permalink / raw)
  To: Darrick J. Wong, sandeen; +Cc: linux-xfs

Looks OK:
Reviewed-by: Allison Collins <allison.henderson@oracle.com>

On 9/5/19 8:40 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Add a new feature to xfs_scrub where specifying multiple -x will cause
> it to perform a media scan of the entire disk, not just the file data
> areas.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>   man/man8/xfs_scrub.8 |    3 +++
>   scrub/phase6.c       |   60 ++++++++++++++++++++++++++++++++++++++++++++++----
>   scrub/phase7.c       |    5 ++++
>   scrub/xfs_scrub.c    |    4 ++-
>   scrub/xfs_scrub.h    |    1 +
>   5 files changed, 66 insertions(+), 7 deletions(-)
> 
> 
> diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8
> index 18948a4e..872a088c 100644
> --- a/man/man8/xfs_scrub.8
> +++ b/man/man8/xfs_scrub.8
> @@ -97,6 +97,9 @@ Prints the version number and exits.
>   .TP
>   .B \-x
>   Read all file data extents to look for disk errors.
> +If this option is given more than once, scrub all disk contents.
> +If this option is given more than twice, report errors even if they have not
> +yet caused data loss.
>   .B xfs_scrub
>   will issue O_DIRECT reads to the block device directly.
>   If the block device is a SCSI disk, it will instead issue READ VERIFY commands
> diff --git a/scrub/phase6.c b/scrub/phase6.c
> index c50fb8fb..7bfb856a 100644
> --- a/scrub/phase6.c
> +++ b/scrub/phase6.c
> @@ -167,7 +167,9 @@ report_data_loss(
>   	int				ret;
>   
>   	/* Only report errors for real extents. */
> -	if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
> +	if (scrub_data < 3 && (bmap->bm_flags & BMV_OF_PREALLOC))
> +		return true;
> +	if (bmap->bm_flags & BMV_OF_DELALLOC)
>   		return true;
>   
>   	if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
> @@ -355,7 +357,7 @@ ioerr_fsmap_report(
>   	uint64_t		err_off;
>   
>   	/* Don't care about unwritten extents. */
> -	if (map->fmr_flags & FMR_OF_PREALLOC)
> +	if (scrub_data < 3 && (map->fmr_flags & FMR_OF_PREALLOC))
>   		return true;
>   
>   	if (err_physical > map->fmr_physical)
> @@ -602,6 +604,49 @@ clean_pool(
>   	return ret;
>   }
>   
> +/* Schedule an entire disk for read verification. */
> +static int
> +verify_entire_disk(
> +	struct read_verify_pool		*rvp,
> +	struct disk			*disk,
> +	struct media_verify_state	*vs)
> +{
> +	return read_verify_schedule_io(rvp, 0, disk->d_size, vs);
> +}
> +
> +/* Scan every part of every disk. */
> +static bool
> +verify_all_disks(
> +	struct scrub_ctx		*ctx,
> +	struct media_verify_state	*vs)
> +{
> +	int				ret;
> +
> +	ret = verify_entire_disk(vs->rvp_data, ctx->datadev, vs);
> +	if (ret) {
> +		str_liberror(ctx, ret, _("scheduling datadev verify"));
> +		return false;
> +	}
> +
> +	if (ctx->logdev) {
> +		ret = verify_entire_disk(vs->rvp_log, ctx->logdev, vs);
> +		if (ret) {
> +			str_liberror(ctx, ret, _("scheduling logdev verify"));
> +			return false;
> +		}
> +	}
> +
> +	if (ctx->rtdev) {
> +		ret = verify_entire_disk(vs->rvp_realtime, ctx->rtdev, vs);
> +		if (ret) {
> +			str_liberror(ctx, ret, _("scheduling rtdev verify"));
> +			return false;
> +		}
> +	}
> +
> +	return true;
> +}
> +
>   /*
>    * Read verify all the file data blocks in a filesystem.  Since XFS doesn't
>    * do data checksums, we trust that the underlying storage will pass back
> @@ -657,7 +702,11 @@ xfs_scan_blocks(
>   			goto out_logpool;
>   		}
>   	}
> -	moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs);
> +
> +	if (scrub_data > 1)
> +		moveon = verify_all_disks(ctx, &vs);
> +	else
> +		moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs);
>   	if (!moveon)
>   		goto out_rtpool;
>   
> @@ -729,8 +778,9 @@ xfs_estimate_verify_work(
>   	if (!moveon)
>   		return moveon;
>   
> -	*items = cvt_off_fsb_to_b(&ctx->mnt,
> -			(d_blocks - d_bfree) + (r_blocks - r_bfree));
> +	*items = cvt_off_fsb_to_b(&ctx->mnt, d_blocks + r_blocks);
> +	if (scrub_data == 1)
> +		*items -= cvt_off_fsb_to_b(&ctx->mnt, d_bfree + r_bfree);
>   	*nr_threads = disk_heads(ctx->datadev);
>   	*rshift = 20;
>   	return moveon;
> diff --git a/scrub/phase7.c b/scrub/phase7.c
> index bc959f5b..570ceb3f 100644
> --- a/scrub/phase7.c
> +++ b/scrub/phase7.c
> @@ -255,6 +255,11 @@ _("%.*f%s inodes counted; %.*f%s inodes checked.\n"),
>   		double		b1, b2;
>   		char		*b1u, *b2u;
>   
> +		if (scrub_data > 1) {
> +			used_data = cvt_off_fsb_to_b(&ctx->mnt, d_blocks);
> +			used_rt = cvt_off_fsb_to_b(&ctx->mnt, r_blocks);
> +		}
> +
>   		b1 = auto_space_units(used_data + used_rt, &b1u);
>   		b2 = auto_space_units(ctx->bytes_checked, &b2u);
>   		fprintf(stdout,
> diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c
> index 2d554340..46876522 100644
> --- a/scrub/xfs_scrub.c
> +++ b/scrub/xfs_scrub.c
> @@ -139,7 +139,7 @@ unsigned int			force_nr_threads;
>   bool				verbose;
>   
>   /* Should we scrub the data blocks? */
> -static bool			scrub_data;
> +int				scrub_data;
>   
>   /* Size of a memory page. */
>   long				page_size;
> @@ -666,7 +666,7 @@ main(
>   			fflush(stdout);
>   			return SCRUB_RET_SUCCESS;
>   		case 'x':
> -			scrub_data = true;
> +			scrub_data++;
>   			break;
>   		case '?':
>   			/* fall through */
> diff --git a/scrub/xfs_scrub.h b/scrub/xfs_scrub.h
> index 54876acb..6558bad7 100644
> --- a/scrub/xfs_scrub.h
> +++ b/scrub/xfs_scrub.h
> @@ -21,6 +21,7 @@ extern bool			want_fstrim;
>   extern bool			stderr_isatty;
>   extern bool			stdout_isatty;
>   extern bool			is_service;
> +extern int			scrub_data;
>   
>   enum scrub_mode {
>   	SCRUB_MODE_DRY_RUN,
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/3] xfs_scrub: relabel verified data block counts in output
  2019-09-06  3:40 ` [PATCH 3/3] xfs_scrub: relabel verified data block counts in output Darrick J. Wong
@ 2019-09-12 23:42   ` Allison Collins
  0 siblings, 0 replies; 8+ messages in thread
From: Allison Collins @ 2019-09-12 23:42 UTC (permalink / raw)
  To: Darrick J. Wong, sandeen; +Cc: linux-xfs

Ok, you can add my review:
Reviewed-by: Allison Collins <allison.henderson@oracle.com>

On 9/5/19 8:40 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Relabel the count of verified data blocks to make it more obvious that
> we were only looking for file data.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>   scrub/phase7.c    |   13 ++++++++-----
>   scrub/xfs_scrub.c |    2 ++
>   2 files changed, 10 insertions(+), 5 deletions(-)
> 
> 
> diff --git a/scrub/phase7.c b/scrub/phase7.c
> index 570ceb3f..2622bc45 100644
> --- a/scrub/phase7.c
> +++ b/scrub/phase7.c
> @@ -116,6 +116,7 @@ xfs_scan_summary(
>   	unsigned long long	f_free;
>   	bool			moveon;
>   	bool			complain;
> +	bool			scrub_all = scrub_data > 1;
>   	int			ip;
>   	int			error;
>   
> @@ -244,14 +245,15 @@ _("%.*f%s inodes counted; %.*f%s inodes checked.\n"),
>   	}
>   
>   	/*
> -	 * Complain if the checked block counts are off, which
> +	 * Complain if the data file verification block counts are off, which
>   	 * implies an incomplete check.
>   	 */
> -	if (ctx->bytes_checked &&
> +	if (scrub_data &&
>   	    (verbose ||
>   	     !within_range(ctx, used_data + used_rt,
>   			ctx->bytes_checked, absdiff, 1, 10,
> -			_("verified blocks")))) {
> +			scrub_all ? _("verified blocks") :
> +				    _("verified file data blocks")))) {
>   		double		b1, b2;
>   		char		*b1u, *b2u;
>   
> @@ -262,8 +264,9 @@ _("%.*f%s inodes counted; %.*f%s inodes checked.\n"),
>   
>   		b1 = auto_space_units(used_data + used_rt, &b1u);
>   		b2 = auto_space_units(ctx->bytes_checked, &b2u);
> -		fprintf(stdout,
> -_("%.1f%s data counted; %.1f%s data verified.\n"),
> +		fprintf(stdout, scrub_all ?
> +_("%.1f%s data counted; %.1f%s disk media verified.\n") :
> +_("%.1f%s data counted; %.1f%s file data media verified.\n"),
>   				b1, b1u, b2, b2u);
>   		fflush(stdout);
>   	}
> diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c
> index 46876522..89f6c96a 100644
> --- a/scrub/xfs_scrub.c
> +++ b/scrub/xfs_scrub.c
> @@ -432,6 +432,8 @@ run_scrub_phases(
>   		/* Turn on certain phases if user said to. */
>   		if (sp->fn == DATASCAN_DUMMY_FN && scrub_data) {
>   			sp->fn = xfs_scan_blocks;
> +			if (scrub_data > 1)
> +				sp->descr = _("Verify disk integrity.");
>   		} else if (sp->fn == REPAIR_DUMMY_FN &&
>   			   ctx->mode == SCRUB_MODE_REPAIR) {
>   			sp->descr = _("Repair filesystem.");
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/3] xfs_scrub: perform media scans of entire devices
  2019-08-26 21:32 [PATCH 0/3] xfs_scrub: media scan entire disks Darrick J. Wong
@ 2019-08-26 21:33 ` Darrick J. Wong
  0 siblings, 0 replies; 8+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:33 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Add a new feature to xfs_scrub where specifying multiple -x will cause
it to perform a media scan of the entire disk, not just the file data
areas.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 man/man8/xfs_scrub.8 |    3 +++
 scrub/phase6.c       |   60 ++++++++++++++++++++++++++++++++++++++++++++++----
 scrub/phase7.c       |    5 ++++
 scrub/xfs_scrub.c    |    4 ++-
 scrub/xfs_scrub.h    |    1 +
 5 files changed, 66 insertions(+), 7 deletions(-)


diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8
index 18948a4e..872a088c 100644
--- a/man/man8/xfs_scrub.8
+++ b/man/man8/xfs_scrub.8
@@ -97,6 +97,9 @@ Prints the version number and exits.
 .TP
 .B \-x
 Read all file data extents to look for disk errors.
+If this option is given more than once, scrub all disk contents.
+If this option is given more than twice, report errors even if they have not
+yet caused data loss.
 .B xfs_scrub
 will issue O_DIRECT reads to the block device directly.
 If the block device is a SCSI disk, it will instead issue READ VERIFY commands
diff --git a/scrub/phase6.c b/scrub/phase6.c
index 1e55fad8..e6e9f954 100644
--- a/scrub/phase6.c
+++ b/scrub/phase6.c
@@ -167,7 +167,9 @@ report_data_loss(
 	int				ret;
 
 	/* Only report errors for real extents. */
-	if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
+	if (scrub_data < 3 && (bmap->bm_flags & BMV_OF_PREALLOC))
+		return true;
+	if (bmap->bm_flags & BMV_OF_DELALLOC)
 		return true;
 
 	if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
@@ -355,7 +357,7 @@ ioerr_fsmap_report(
 	uint64_t		err_off;
 
 	/* Don't care about unwritten extents. */
-	if (map->fmr_flags & FMR_OF_PREALLOC)
+	if (scrub_data < 3 && (map->fmr_flags & FMR_OF_PREALLOC))
 		return true;
 
 	if (err_physical > map->fmr_physical)
@@ -602,6 +604,49 @@ clean_pool(
 	return ret;
 }
 
+/* Schedule an entire disk for read verification. */
+static int
+verify_entire_disk(
+	struct read_verify_pool		*rvp,
+	struct disk			*disk,
+	struct media_verify_state	*vs)
+{
+	return read_verify_schedule_io(rvp, 0, disk->d_size, vs);
+}
+
+/* Scan every part of every disk. */
+static bool
+verify_all_disks(
+	struct scrub_ctx		*ctx,
+	struct media_verify_state	*vs)
+{
+	int				ret;
+
+	ret = verify_entire_disk(vs->rvp_data, ctx->datadev, vs);
+	if (ret) {
+		str_liberror(ctx, ret, _("scheduling datadev verify"));
+		return false;
+	}
+
+	if (ctx->logdev) {
+		ret = verify_entire_disk(vs->rvp_log, ctx->logdev, vs);
+		if (ret) {
+			str_liberror(ctx, ret, _("scheduling logdev verify"));
+			return false;
+		}
+	}
+
+	if (ctx->rtdev) {
+		ret = verify_entire_disk(vs->rvp_realtime, ctx->rtdev, vs);
+		if (ret) {
+			str_liberror(ctx, ret, _("scheduling rtdev verify"));
+			return false;
+		}
+	}
+
+	return true;
+}
+
 /*
  * Read verify all the file data blocks in a filesystem.  Since XFS doesn't
  * do data checksums, we trust that the underlying storage will pass back
@@ -657,7 +702,11 @@ xfs_scan_blocks(
 			goto out_logpool;
 		}
 	}
-	moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs);
+
+	if (scrub_data > 1)
+		moveon = verify_all_disks(ctx, &vs);
+	else
+		moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs);
 	if (!moveon)
 		goto out_rtpool;
 
@@ -729,8 +778,9 @@ xfs_estimate_verify_work(
 	if (!moveon)
 		return moveon;
 
-	*items = xfrog_fsb_to_b(&ctx->mnt,
-			(d_blocks - d_bfree) + (r_blocks - r_bfree));
+	*items = xfrog_fsb_to_b(&ctx->mnt, d_blocks + r_blocks);
+	if (scrub_data == 1)
+		*items -= xfrog_fsb_to_b(&ctx->mnt, d_bfree + r_bfree);
 	*nr_threads = disk_heads(ctx->datadev);
 	*rshift = 20;
 	return moveon;
diff --git a/scrub/phase7.c b/scrub/phase7.c
index cf88e30f..065a19dc 100644
--- a/scrub/phase7.c
+++ b/scrub/phase7.c
@@ -255,6 +255,11 @@ _("%.*f%s inodes counted; %.*f%s inodes checked.\n"),
 		double		b1, b2;
 		char		*b1u, *b2u;
 
+		if (scrub_data > 1) {
+			used_data = xfrog_fsb_to_b(&ctx->mnt, d_blocks);
+			used_rt = xfrog_fsb_to_b(&ctx->mnt, r_blocks);
+		}
+
 		b1 = auto_space_units(used_data + used_rt, &b1u);
 		b2 = auto_space_units(ctx->bytes_checked, &b2u);
 		fprintf(stdout,
diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c
index 7749637e..c4138807 100644
--- a/scrub/xfs_scrub.c
+++ b/scrub/xfs_scrub.c
@@ -139,7 +139,7 @@ unsigned int			force_nr_threads;
 bool				verbose;
 
 /* Should we scrub the data blocks? */
-static bool			scrub_data;
+int				scrub_data;
 
 /* Size of a memory page. */
 long				page_size;
@@ -666,7 +666,7 @@ main(
 			fflush(stdout);
 			return SCRUB_RET_SUCCESS;
 		case 'x':
-			scrub_data = true;
+			scrub_data++;
 			break;
 		case '?':
 			/* fall through */
diff --git a/scrub/xfs_scrub.h b/scrub/xfs_scrub.h
index 6984d24c..9317fd3b 100644
--- a/scrub/xfs_scrub.h
+++ b/scrub/xfs_scrub.h
@@ -21,6 +21,7 @@ extern bool			want_fstrim;
 extern bool			stderr_isatty;
 extern bool			stdout_isatty;
 extern bool			is_service;
+extern int			scrub_data;
 
 enum scrub_mode {
 	SCRUB_MODE_DRY_RUN,


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, back to index

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-06  3:40 [PATCH 0/3] xfs_scrub: media scan entire disks Darrick J. Wong
2019-09-06  3:40 ` [PATCH 1/3] xfs_scrub: implement background mode for phase 6 Darrick J. Wong
2019-09-12 23:42   ` Allison Collins
2019-09-06  3:40 ` [PATCH 2/3] xfs_scrub: perform media scans of entire devices Darrick J. Wong
2019-09-12 23:42   ` Allison Collins
2019-09-06  3:40 ` [PATCH 3/3] xfs_scrub: relabel verified data block counts in output Darrick J. Wong
2019-09-12 23:42   ` Allison Collins
  -- strict thread matches above, loose matches on Subject: below --
2019-08-26 21:32 [PATCH 0/3] xfs_scrub: media scan entire disks Darrick J. Wong
2019-08-26 21:33 ` [PATCH 2/3] xfs_scrub: perform media scans of entire devices Darrick J. Wong

Linux-XFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \
		linux-xfs@vger.kernel.org linux-xfs@archiver.kernel.org
	public-inbox-index linux-xfs


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox