All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] xfsprogs: scrub filesystem summary counters
@ 2019-08-26 21:20 Darrick J. Wong
  2019-08-26 21:21 ` [PATCH 1/2] xfs_io: add online scrub/repair for superblock counters Darrick J. Wong
  2019-08-26 21:21 ` [PATCH 2/2] xfs_scrub: check summary counters Darrick J. Wong
  0 siblings, 2 replies; 6+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:20 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

Hi all,

This patch series introduces a totally new filesystem summary counter
online scrub feature.  Whereas previous iterations froze the filesystem
to count inodes and free blocks, this one drastically reduces overhead
by loosening its precision somewhat.  Instead of freezing the fs, we
race the expected summary counter calculation with normal fs operations
and use thresholding to check that the counters are in the ballpark,
where ballpark means "off by less than 6% or so".

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=scrub-summary-counters

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=scrub-summary-counters

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] xfs_io: add online scrub/repair for superblock counters
  2019-08-26 21:20 [PATCH 0/2] xfsprogs: scrub filesystem summary counters Darrick J. Wong
@ 2019-08-26 21:21 ` Darrick J. Wong
  2019-08-27  5:09   ` Dave Chinner
  2019-08-26 21:21 ` [PATCH 2/2] xfs_scrub: check summary counters Darrick J. Wong
  1 sibling, 1 reply; 6+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Wire up the xfs_io scrub/repair commands to the new superblock summary
counter ioctls.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 io/scrub.c |    1 +
 1 file changed, 1 insertion(+)


diff --git a/io/scrub.c b/io/scrub.c
index 052497be..b6848e5f 100644
--- a/io/scrub.c
+++ b/io/scrub.c
@@ -53,6 +53,7 @@ static const struct scrub_descr scrubbers[XFS_SCRUB_TYPE_NR] = {
 	[XFS_SCRUB_TYPE_UQUOTA]		= {"usrquota",		ST_FS},
 	[XFS_SCRUB_TYPE_GQUOTA]		= {"grpquota",		ST_FS},
 	[XFS_SCRUB_TYPE_PQUOTA]		= {"prjquota",		ST_FS},
+	[XFS_SCRUB_TYPE_FSCOUNTERS]	= {"fscounters",	ST_FS},
 };
 
 static void


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] xfs_scrub: check summary counters
  2019-08-26 21:20 [PATCH 0/2] xfsprogs: scrub filesystem summary counters Darrick J. Wong
  2019-08-26 21:21 ` [PATCH 1/2] xfs_io: add online scrub/repair for superblock counters Darrick J. Wong
@ 2019-08-26 21:21 ` Darrick J. Wong
  2019-08-27  5:27   ` Dave Chinner
  1 sibling, 1 reply; 6+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Teach scrub to ask the kernel to check and repair summary counters
during phase 7.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/phase4.c |   12 ++++++++++++
 scrub/phase7.c |   14 ++++++++++++++
 scrub/repair.c |    3 +++
 scrub/scrub.c  |   13 +++++++++++++
 scrub/scrub.h  |    2 ++
 5 files changed, 44 insertions(+)


diff --git a/scrub/phase4.c b/scrub/phase4.c
index 49f00723..c4da4852 100644
--- a/scrub/phase4.c
+++ b/scrub/phase4.c
@@ -107,6 +107,18 @@ bool
 xfs_repair_fs(
 	struct scrub_ctx		*ctx)
 {
+	bool				moveon;
+
+	/*
+	 * Check the summary counters early.  Normally we do this during phase
+	 * seven, but some of the cross-referencing requires fairly-accurate
+	 * counters, so counter repairs have to be put on the list now so that
+	 * they get fixed before we stop retrying unfixed metadata repairs.
+	 */
+	moveon = xfs_scrub_fs_summary(ctx, &ctx->action_lists[0]);
+	if (!moveon)
+		return false;
+
 	return xfs_process_action_items(ctx);
 }
 
diff --git a/scrub/phase7.c b/scrub/phase7.c
index 1c459dfc..b3156fdf 100644
--- a/scrub/phase7.c
+++ b/scrub/phase7.c
@@ -7,12 +7,15 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <sys/statvfs.h>
+#include "list.h"
 #include "path.h"
 #include "ptvar.h"
 #include "xfs_scrub.h"
 #include "common.h"
+#include "scrub.h"
 #include "fscounters.h"
 #include "spacemap.h"
+#include "repair.h"
 
 /* Phase 7: Check summary counters. */
 
@@ -91,6 +94,7 @@ xfs_scan_summary(
 	struct scrub_ctx	*ctx)
 {
 	struct summary_counts	totalcount = {0};
+	struct xfs_action_list	alist;
 	struct ptvar		*ptvar;
 	unsigned long long	used_data;
 	unsigned long long	used_rt;
@@ -110,6 +114,16 @@ xfs_scan_summary(
 	int			ip;
 	int			error;
 
+	/* Check and fix the fs summary counters. */
+	xfs_action_list_init(&alist);
+	moveon = xfs_scrub_fs_summary(ctx, &alist);
+	if (!moveon)
+		return false;
+	moveon = xfs_action_list_process(ctx, ctx->mnt.fd, &alist,
+			ALP_COMPLAIN_IF_UNFIXED | ALP_NOPROGRESS);
+	if (!moveon)
+		return moveon;
+
 	/* Flush everything out to disk before we start counting. */
 	error = syncfs(ctx->mnt.fd);
 	if (error) {
diff --git a/scrub/repair.c b/scrub/repair.c
index 45450d8c..54639752 100644
--- a/scrub/repair.c
+++ b/scrub/repair.c
@@ -84,6 +84,9 @@ xfs_action_item_priority(
 	case XFS_SCRUB_TYPE_GQUOTA:
 	case XFS_SCRUB_TYPE_PQUOTA:
 		return PRIO(aitem, XFS_SCRUB_TYPE_UQUOTA);
+	case XFS_SCRUB_TYPE_FSCOUNTERS:
+		/* This should always go after AG headers no matter what. */
+		return PRIO(aitem, INT_MAX);
 	}
 	abort();
 }
diff --git a/scrub/scrub.c b/scrub/scrub.c
index 136ed529..a428b524 100644
--- a/scrub/scrub.c
+++ b/scrub/scrub.c
@@ -28,6 +28,7 @@ enum scrub_type {
 	ST_PERAG,	/* per-AG metadata */
 	ST_FS,		/* per-FS metadata */
 	ST_INODE,	/* per-inode metadata */
+	ST_SUMMARY,	/* summary counters (phase 7) */
 };
 struct scrub_descr {
 	const char	*name;
@@ -84,6 +85,8 @@ static const struct scrub_descr scrubbers[XFS_SCRUB_TYPE_NR] = {
 		{"group quotas",			ST_FS},
 	[XFS_SCRUB_TYPE_PQUOTA] =
 		{"project quotas",			ST_FS},
+	[XFS_SCRUB_TYPE_FSCOUNTERS] =
+		{"filesystem summary counters",		ST_SUMMARY},
 };
 
 /* Format a scrub description. */
@@ -105,6 +108,7 @@ format_scrub_descr(
 				(uint64_t)meta->sm_ino, _(sc->name));
 		break;
 	case ST_FS:
+	case ST_SUMMARY:
 		snprintf(buf, buflen, _("%s"), _(sc->name));
 		break;
 	case ST_NONE:
@@ -446,6 +450,15 @@ xfs_scrub_fs_metadata(
 	return xfs_scrub_metadata(ctx, ST_FS, 0, alist);
 }
 
+/* Scrub FS summary metadata. */
+bool
+xfs_scrub_fs_summary(
+	struct scrub_ctx		*ctx,
+	struct xfs_action_list		*alist)
+{
+	return xfs_scrub_metadata(ctx, ST_SUMMARY, 0, alist);
+}
+
 /* How many items do we have to check? */
 unsigned int
 xfs_scrub_estimate_ag_work(
diff --git a/scrub/scrub.h b/scrub/scrub.h
index e6e3f16f..449c43de 100644
--- a/scrub/scrub.h
+++ b/scrub/scrub.h
@@ -25,6 +25,8 @@ bool xfs_scrub_ag_metadata(struct scrub_ctx *ctx, xfs_agnumber_t agno,
 		struct xfs_action_list *alist);
 bool xfs_scrub_fs_metadata(struct scrub_ctx *ctx,
 		struct xfs_action_list *alist);
+bool xfs_scrub_fs_summary(struct scrub_ctx *ctx,
+		struct xfs_action_list *alist);
 
 bool xfs_can_scrub_fs_metadata(struct scrub_ctx *ctx);
 bool xfs_can_scrub_inode(struct scrub_ctx *ctx);


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] xfs_io: add online scrub/repair for superblock counters
  2019-08-26 21:21 ` [PATCH 1/2] xfs_io: add online scrub/repair for superblock counters Darrick J. Wong
@ 2019-08-27  5:09   ` Dave Chinner
  0 siblings, 0 replies; 6+ messages in thread
From: Dave Chinner @ 2019-08-27  5:09 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Aug 26, 2019 at 02:21:03PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Wire up the xfs_io scrub/repair commands to the new superblock summary
> counter ioctls.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Looks fine.

Reviewed-by: Dave Chinner <dchinner@redhat.com>

-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] xfs_scrub: check summary counters
  2019-08-26 21:21 ` [PATCH 2/2] xfs_scrub: check summary counters Darrick J. Wong
@ 2019-08-27  5:27   ` Dave Chinner
  2019-08-29  3:15     ` Darrick J. Wong
  0 siblings, 1 reply; 6+ messages in thread
From: Dave Chinner @ 2019-08-27  5:27 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Aug 26, 2019 at 02:21:09PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Teach scrub to ask the kernel to check and repair summary counters
> during phase 7.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  scrub/phase4.c |   12 ++++++++++++
>  scrub/phase7.c |   14 ++++++++++++++
>  scrub/repair.c |    3 +++
>  scrub/scrub.c  |   13 +++++++++++++
>  scrub/scrub.h  |    2 ++
>  5 files changed, 44 insertions(+)
> 
> 
> diff --git a/scrub/phase4.c b/scrub/phase4.c
> index 49f00723..c4da4852 100644
> --- a/scrub/phase4.c
> +++ b/scrub/phase4.c
> @@ -107,6 +107,18 @@ bool
>  xfs_repair_fs(
>  	struct scrub_ctx		*ctx)
>  {
> +	bool				moveon;
> +
> +	/*
> +	 * Check the summary counters early.  Normally we do this during phase
> +	 * seven, but some of the cross-referencing requires fairly-accurate
> +	 * counters, so counter repairs have to be put on the list now so that
> +	 * they get fixed before we stop retrying unfixed metadata repairs.
> +	 */
> +	moveon = xfs_scrub_fs_summary(ctx, &ctx->action_lists[0]);
> +	if (!moveon)
> +		return false;

"moveon" doesn't really make sense to me here. i.e. I can't tell if
"moveon = true" meant it failed or not, so I hav eno idea what the
intent of the code here is, and the comment doesn't explain it at
all, either.

> +
>  	return xfs_process_action_items(ctx);
>  }
>  
> diff --git a/scrub/phase7.c b/scrub/phase7.c
> index 1c459dfc..b3156fdf 100644
> --- a/scrub/phase7.c
> +++ b/scrub/phase7.c
> @@ -7,12 +7,15 @@
>  #include <stdint.h>
>  #include <stdlib.h>
>  #include <sys/statvfs.h>
> +#include "list.h"
>  #include "path.h"
>  #include "ptvar.h"
>  #include "xfs_scrub.h"
>  #include "common.h"
> +#include "scrub.h"
>  #include "fscounters.h"
>  #include "spacemap.h"
> +#include "repair.h"
>  
>  /* Phase 7: Check summary counters. */
>  
> @@ -91,6 +94,7 @@ xfs_scan_summary(
>  	struct scrub_ctx	*ctx)
>  {
>  	struct summary_counts	totalcount = {0};
> +	struct xfs_action_list	alist;
>  	struct ptvar		*ptvar;
>  	unsigned long long	used_data;
>  	unsigned long long	used_rt;
> @@ -110,6 +114,16 @@ xfs_scan_summary(
>  	int			ip;
>  	int			error;
>  
> +	/* Check and fix the fs summary counters. */
> +	xfs_action_list_init(&alist);
> +	moveon = xfs_scrub_fs_summary(ctx, &alist);
> +	if (!moveon)
> +		return false;
> +	moveon = xfs_action_list_process(ctx, ctx->mnt.fd, &alist,
> +			ALP_COMPLAIN_IF_UNFIXED | ALP_NOPROGRESS);
> +	if (!moveon)
> +		return moveon;

same here - "moveon" doesn't tell me if we're returning because the
scrub failed or passed....

> +
>  	/* Flush everything out to disk before we start counting. */
>  	error = syncfs(ctx->mnt.fd);
>  	if (error) {
> diff --git a/scrub/repair.c b/scrub/repair.c
> index 45450d8c..54639752 100644
> --- a/scrub/repair.c
> +++ b/scrub/repair.c
> @@ -84,6 +84,9 @@ xfs_action_item_priority(
>  	case XFS_SCRUB_TYPE_GQUOTA:
>  	case XFS_SCRUB_TYPE_PQUOTA:
>  		return PRIO(aitem, XFS_SCRUB_TYPE_UQUOTA);
> +	case XFS_SCRUB_TYPE_FSCOUNTERS:
> +		/* This should always go after AG headers no matter what. */
> +		return PRIO(aitem, INT_MAX);
>  	}
>  	abort();
>  }
> diff --git a/scrub/scrub.c b/scrub/scrub.c
> index 136ed529..a428b524 100644
> --- a/scrub/scrub.c
> +++ b/scrub/scrub.c
> @@ -28,6 +28,7 @@ enum scrub_type {
>  	ST_PERAG,	/* per-AG metadata */
>  	ST_FS,		/* per-FS metadata */
>  	ST_INODE,	/* per-inode metadata */
> +	ST_SUMMARY,	/* summary counters (phase 7) */
>  };

Hmmm - the previous patch used ST_FS for the summary counters.

Oh, wait, io/scrub.c has a duplicate scrub_type enum defined, and
the table looks largely the same, too. Except now the summary type
is different.

/me looks a bit closer...

Oh, the enum scrub_type definitions shadow the kernel enum
xchk_type, but have different values for the same names. I'm
just confused now...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] xfs_scrub: check summary counters
  2019-08-27  5:27   ` Dave Chinner
@ 2019-08-29  3:15     ` Darrick J. Wong
  0 siblings, 0 replies; 6+ messages in thread
From: Darrick J. Wong @ 2019-08-29  3:15 UTC (permalink / raw)
  To: Dave Chinner; +Cc: sandeen, linux-xfs

On Tue, Aug 27, 2019 at 03:27:26PM +1000, Dave Chinner wrote:
> On Mon, Aug 26, 2019 at 02:21:09PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Teach scrub to ask the kernel to check and repair summary counters
> > during phase 7.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  scrub/phase4.c |   12 ++++++++++++
> >  scrub/phase7.c |   14 ++++++++++++++
> >  scrub/repair.c |    3 +++
> >  scrub/scrub.c  |   13 +++++++++++++
> >  scrub/scrub.h  |    2 ++
> >  5 files changed, 44 insertions(+)
> > 
> > 
> > diff --git a/scrub/phase4.c b/scrub/phase4.c
> > index 49f00723..c4da4852 100644
> > --- a/scrub/phase4.c
> > +++ b/scrub/phase4.c
> > @@ -107,6 +107,18 @@ bool
> >  xfs_repair_fs(
> >  	struct scrub_ctx		*ctx)
> >  {
> > +	bool				moveon;
> > +
> > +	/*
> > +	 * Check the summary counters early.  Normally we do this during phase
> > +	 * seven, but some of the cross-referencing requires fairly-accurate
> > +	 * counters, so counter repairs have to be put on the list now so that
> > +	 * they get fixed before we stop retrying unfixed metadata repairs.
> > +	 */
> > +	moveon = xfs_scrub_fs_summary(ctx, &ctx->action_lists[0]);
> > +	if (!moveon)
> > +		return false;
> 
> "moveon" doesn't really make sense to me here. i.e. I can't tell if
> "moveon = true" meant it failed or not, so I hav eno idea what the
> intent of the code here is, and the comment doesn't explain it at
> all, either.

FWIW I created Yet Another Cleanup Series that replaces all the moveon
things with regular old "returns 0 for success, nonzero for error GTFO"
semantics.  I'll tack that on the end of all the stuff I've sent so far.

--D

> > +
> >  	return xfs_process_action_items(ctx);
> >  }
> >  
> > diff --git a/scrub/phase7.c b/scrub/phase7.c
> > index 1c459dfc..b3156fdf 100644
> > --- a/scrub/phase7.c
> > +++ b/scrub/phase7.c
> > @@ -7,12 +7,15 @@
> >  #include <stdint.h>
> >  #include <stdlib.h>
> >  #include <sys/statvfs.h>
> > +#include "list.h"
> >  #include "path.h"
> >  #include "ptvar.h"
> >  #include "xfs_scrub.h"
> >  #include "common.h"
> > +#include "scrub.h"
> >  #include "fscounters.h"
> >  #include "spacemap.h"
> > +#include "repair.h"
> >  
> >  /* Phase 7: Check summary counters. */
> >  
> > @@ -91,6 +94,7 @@ xfs_scan_summary(
> >  	struct scrub_ctx	*ctx)
> >  {
> >  	struct summary_counts	totalcount = {0};
> > +	struct xfs_action_list	alist;
> >  	struct ptvar		*ptvar;
> >  	unsigned long long	used_data;
> >  	unsigned long long	used_rt;
> > @@ -110,6 +114,16 @@ xfs_scan_summary(
> >  	int			ip;
> >  	int			error;
> >  
> > +	/* Check and fix the fs summary counters. */
> > +	xfs_action_list_init(&alist);
> > +	moveon = xfs_scrub_fs_summary(ctx, &alist);
> > +	if (!moveon)
> > +		return false;
> > +	moveon = xfs_action_list_process(ctx, ctx->mnt.fd, &alist,
> > +			ALP_COMPLAIN_IF_UNFIXED | ALP_NOPROGRESS);
> > +	if (!moveon)
> > +		return moveon;
> 
> same here - "moveon" doesn't tell me if we're returning because the
> scrub failed or passed....
> 
> > +
> >  	/* Flush everything out to disk before we start counting. */
> >  	error = syncfs(ctx->mnt.fd);
> >  	if (error) {
> > diff --git a/scrub/repair.c b/scrub/repair.c
> > index 45450d8c..54639752 100644
> > --- a/scrub/repair.c
> > +++ b/scrub/repair.c
> > @@ -84,6 +84,9 @@ xfs_action_item_priority(
> >  	case XFS_SCRUB_TYPE_GQUOTA:
> >  	case XFS_SCRUB_TYPE_PQUOTA:
> >  		return PRIO(aitem, XFS_SCRUB_TYPE_UQUOTA);
> > +	case XFS_SCRUB_TYPE_FSCOUNTERS:
> > +		/* This should always go after AG headers no matter what. */
> > +		return PRIO(aitem, INT_MAX);
> >  	}
> >  	abort();
> >  }
> > diff --git a/scrub/scrub.c b/scrub/scrub.c
> > index 136ed529..a428b524 100644
> > --- a/scrub/scrub.c
> > +++ b/scrub/scrub.c
> > @@ -28,6 +28,7 @@ enum scrub_type {
> >  	ST_PERAG,	/* per-AG metadata */
> >  	ST_FS,		/* per-FS metadata */
> >  	ST_INODE,	/* per-inode metadata */
> > +	ST_SUMMARY,	/* summary counters (phase 7) */
> >  };
> 
> Hmmm - the previous patch used ST_FS for the summary counters.
> 
> Oh, wait, io/scrub.c has a duplicate scrub_type enum defined, and
> the table looks largely the same, too. Except now the summary type
> is different.
> 
> /me looks a bit closer...
> 
> Oh, the enum scrub_type definitions shadow the kernel enum
> xchk_type, but have different values for the same names. I'm
> just confused now...
> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-08-29  3:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-26 21:20 [PATCH 0/2] xfsprogs: scrub filesystem summary counters Darrick J. Wong
2019-08-26 21:21 ` [PATCH 1/2] xfs_io: add online scrub/repair for superblock counters Darrick J. Wong
2019-08-27  5:09   ` Dave Chinner
2019-08-26 21:21 ` [PATCH 2/2] xfs_scrub: check summary counters Darrick J. Wong
2019-08-27  5:27   ` Dave Chinner
2019-08-29  3:15     ` Darrick J. Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.