All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/4] xfs: scrub/repair update health tracking
@ 2019-04-11  1:46 Darrick J. Wong
  2019-04-11  1:46 ` [PATCH 1/4] xfs: refactor scrub context initialization Darrick J. Wong
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Darrick J. Wong @ 2019-04-11  1:46 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

Hi all,

This series enhances the online scrub and repair code to report their
findings to the health tracking subsystem.  For now, only scrub gets to
record corruption problems, though in principle a metadata read
encountering corruption could also set a sick flag.

Online repair will clear the appropriate sick flags when metadata passes
its inspection after a repair attempt.

On the userspace side of things, xfs_scrub will be adapted to give a
clean bill of health to the kernel when it is warranted.

The first three patches rework the scrub context management code to
remove some clunkiness when we decide that we have to start the scrub
over.

Patch #4 actually teaches scrub to update the health subsystem.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=scrub-health-tracking

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/4] xfs: refactor scrub context initialization
  2019-04-11  1:46 [PATCH v2 0/4] xfs: scrub/repair update health tracking Darrick J. Wong
@ 2019-04-11  1:46 ` Darrick J. Wong
  2019-04-11  1:46 ` [PATCH 2/4] xfs: collapse scrub bool state flags into a single unsigned int Darrick J. Wong
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Darrick J. Wong @ 2019-04-11  1:46 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

It's a little silly how the memset in scrub context initialization
forces us to declare stack variables to preserve context variables
across a retry.  Since the teardown functions already null out most of
the ephemeral state (buffer pointers, btree cursors, etc.), just skip
the memset and move the initialization as needed.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/scrub/scrub.c |   31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)


diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 1b2344d00525..08df00911dd3 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -186,8 +186,10 @@ xchk_teardown(
 			xfs_irele(sc->ip);
 		sc->ip = NULL;
 	}
-	if (sc->has_quotaofflock)
+	if (sc->has_quotaofflock) {
 		mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
+		sc->has_quotaofflock = false;
+	}
 	if (sc->buf) {
 		kmem_free(sc->buf);
 		sc->buf = NULL;
@@ -466,9 +468,14 @@ xfs_scrub_metadata(
 	struct xfs_inode		*ip,
 	struct xfs_scrub_metadata	*sm)
 {
-	struct xfs_scrub		sc;
+	struct xfs_scrub		sc = {
+		.mp			= ip->i_mount,
+		.sm			= sm,
+		.sa			= {
+			.agno		= NULLAGNUMBER,
+		},
+	};
 	struct xfs_mount		*mp = ip->i_mount;
-	bool				try_harder = false;
 	bool				already_fixed = false;
 	int				error = 0;
 
@@ -491,21 +498,16 @@ xfs_scrub_metadata(
 
 	xchk_experimental_warning(mp);
 
+	sc.ops = &meta_scrub_ops[sm->sm_type];
 retry_op:
 	/* Set up for the operation. */
-	memset(&sc, 0, sizeof(sc));
-	sc.mp = ip->i_mount;
-	sc.sm = sm;
-	sc.ops = &meta_scrub_ops[sm->sm_type];
-	sc.try_harder = try_harder;
-	sc.sa.agno = NULLAGNUMBER;
 	error = sc.ops->setup(&sc, ip);
 	if (error)
 		goto out_teardown;
 
 	/* Scrub for errors. */
 	error = sc.ops->scrub(&sc);
-	if (!try_harder && error == -EDEADLOCK) {
+	if (!sc.try_harder && error == -EDEADLOCK) {
 		/*
 		 * Scrubbers return -EDEADLOCK to mean 'try harder'.
 		 * Tear down everything we hold, then set up again with
@@ -514,7 +516,7 @@ xfs_scrub_metadata(
 		error = xchk_teardown(&sc, ip, 0);
 		if (error)
 			goto out;
-		try_harder = true;
+		sc.try_harder = true;
 		goto retry_op;
 	} else if (error)
 		goto out_teardown;
@@ -544,8 +546,11 @@ xfs_scrub_metadata(
 		 */
 		error = xrep_attempt(ip, &sc, &already_fixed);
 		if (error == -EAGAIN) {
-			if (sc.try_harder)
-				try_harder = true;
+			/*
+			 * Either the repair function succeeded or it couldn't
+			 * get all the resources it needs; either way, we go
+			 * back to the beginning and call the scrub function.
+			 */
 			error = xchk_teardown(&sc, ip, 0);
 			if (error) {
 				xrep_failure(mp);

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/4] xfs: collapse scrub bool state flags into a single unsigned int
  2019-04-11  1:46 [PATCH v2 0/4] xfs: scrub/repair update health tracking Darrick J. Wong
  2019-04-11  1:46 ` [PATCH 1/4] xfs: refactor scrub context initialization Darrick J. Wong
@ 2019-04-11  1:46 ` Darrick J. Wong
  2019-04-11  1:46 ` [PATCH 3/4] xfs: hoist the already_fixed variable to the scrub context Darrick J. Wong
  2019-04-11  1:46 ` [PATCH 4/4] xfs: scrub/repair should update filesystem metadata health Darrick J. Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Darrick J. Wong @ 2019-04-11  1:46 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Combine all the boolean state flags in struct xfs_scrub into a single
unsigned int, because we're going to be adding more state flags soon.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/scrub/ialloc.c |    4 ++--
 fs/xfs/scrub/parent.c |    2 +-
 fs/xfs/scrub/quota.c  |    2 +-
 fs/xfs/scrub/repair.c |    4 ++--
 fs/xfs/scrub/scrub.c  |    8 ++++----
 fs/xfs/scrub/scrub.h  |    9 +++++++--
 6 files changed, 17 insertions(+), 12 deletions(-)


diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 700114f79a7d..693eb51f5efb 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -39,7 +39,7 @@ xchk_setup_ag_iallocbt(
 	struct xfs_scrub	*sc,
 	struct xfs_inode	*ip)
 {
-	return xchk_setup_ag_btree(sc, ip, sc->try_harder);
+	return xchk_setup_ag_btree(sc, ip, sc->flags & XCHK_TRY_HARDER);
 }
 
 /* Inode btree scrubber. */
@@ -185,7 +185,7 @@ xchk_iallocbt_check_cluster_ifree(
 	if (error == -ENODATA) {
 		/* Not cached, just read the disk buffer */
 		freemask_ok = irec_free ^ !!(dip->di_mode);
-		if (!bs->sc->try_harder && !freemask_ok)
+		if (!(bs->sc->flags & XCHK_TRY_HARDER) && !freemask_ok)
 			return -EDEADLOCK;
 	} else if (error < 0) {
 		/*
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 1c9d7c7f64f5..d5d197f1b80f 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -320,7 +320,7 @@ xchk_parent(
 	 * If we failed to lock the parent inode even after a retry, just mark
 	 * this scrub incomplete and return.
 	 */
-	if (sc->try_harder && error == -EDEADLOCK) {
+	if ((sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
 		error = 0;
 		xchk_set_incomplete(sc);
 	}
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 782d582d3edd..5dfe2b5924db 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -60,7 +60,7 @@ xchk_setup_quota(
 	dqtype = xchk_quota_to_dqtype(sc);
 	if (dqtype == 0)
 		return -EINVAL;
-	sc->has_quotaofflock = true;
+	sc->flags |= XCHK_HAS_QUOTAOFFLOCK;
 	mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
 	if (!xfs_this_quota_on(sc->mp, dqtype))
 		return -ENOENT;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index f28f4bad317b..c093939fe35a 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -71,8 +71,8 @@ xrep_attempt(
 	case -EDEADLOCK:
 	case -EAGAIN:
 		/* Tell the caller to try again having grabbed all the locks. */
-		if (!sc->try_harder) {
-			sc->try_harder = true;
+		if (!(sc->flags & XCHK_TRY_HARDER)) {
+			sc->flags |= XCHK_TRY_HARDER;
 			return -EAGAIN;
 		}
 		/*
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 08df00911dd3..6e18a1178e26 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -186,9 +186,9 @@ xchk_teardown(
 			xfs_irele(sc->ip);
 		sc->ip = NULL;
 	}
-	if (sc->has_quotaofflock) {
+	if (sc->flags & XCHK_HAS_QUOTAOFFLOCK) {
 		mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
-		sc->has_quotaofflock = false;
+		sc->flags &= ~XCHK_HAS_QUOTAOFFLOCK;
 	}
 	if (sc->buf) {
 		kmem_free(sc->buf);
@@ -507,7 +507,7 @@ xfs_scrub_metadata(
 
 	/* Scrub for errors. */
 	error = sc.ops->scrub(&sc);
-	if (!sc.try_harder && error == -EDEADLOCK) {
+	if (!(sc.flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
 		/*
 		 * Scrubbers return -EDEADLOCK to mean 'try harder'.
 		 * Tear down everything we hold, then set up again with
@@ -516,7 +516,7 @@ xfs_scrub_metadata(
 		error = xchk_teardown(&sc, ip, 0);
 		if (error)
 			goto out;
-		sc.try_harder = true;
+		sc.flags |= XCHK_TRY_HARDER;
 		goto retry_op;
 	} else if (error)
 		goto out_teardown;
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 22f754fba8e5..60359e7de930 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -62,13 +62,18 @@ struct xfs_scrub {
 	struct xfs_inode		*ip;
 	void				*buf;
 	uint				ilock_flags;
-	bool				try_harder;
-	bool				has_quotaofflock;
+
+	/* See the XCHK state flags below. */
+	unsigned int			flags;
 
 	/* State tracking for single-AG operations. */
 	struct xchk_ag			sa;
 };
 
+/* XCHK state flags */
+#define XCHK_TRY_HARDER		(1 << 0)  /* can't get resources, try again */
+#define XCHK_HAS_QUOTAOFFLOCK	(1 << 1)  /* we hold the quotaoff lock */
+
 /* Metadata scrubbers */
 int xchk_tester(struct xfs_scrub *sc);
 int xchk_superblock(struct xfs_scrub *sc);

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/4] xfs: hoist the already_fixed variable to the scrub context
  2019-04-11  1:46 [PATCH v2 0/4] xfs: scrub/repair update health tracking Darrick J. Wong
  2019-04-11  1:46 ` [PATCH 1/4] xfs: refactor scrub context initialization Darrick J. Wong
  2019-04-11  1:46 ` [PATCH 2/4] xfs: collapse scrub bool state flags into a single unsigned int Darrick J. Wong
@ 2019-04-11  1:46 ` Darrick J. Wong
  2019-04-11  1:46 ` [PATCH 4/4] xfs: scrub/repair should update filesystem metadata health Darrick J. Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Darrick J. Wong @ 2019-04-11  1:46 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Now that we no longer memset the scrub context, we can move the
already_fixed variable into the scrub context's state flags instead of
passing around pointers to separate stack variables.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/scrub/repair.c |    5 ++---
 fs/xfs/scrub/repair.h |    5 ++---
 fs/xfs/scrub/scrub.c  |    6 +++---
 fs/xfs/scrub/scrub.h  |    5 +++--
 4 files changed, 10 insertions(+), 11 deletions(-)


diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index c093939fe35a..5e7e36cdf3d5 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -46,8 +46,7 @@
 int
 xrep_attempt(
 	struct xfs_inode	*ip,
-	struct xfs_scrub	*sc,
-	bool			*fixed)
+	struct xfs_scrub	*sc)
 {
 	int			error = 0;
 
@@ -66,7 +65,7 @@ xrep_attempt(
 		 * scrub so that we can tell userspace if we fixed the problem.
 		 */
 		sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
-		*fixed = true;
+		sc->flags |= XREP_ALREADY_FIXED;
 		return -EAGAIN;
 	case -EDEADLOCK:
 	case -EAGAIN:
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index d990314eb08b..60c61d7052a8 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -15,7 +15,7 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
 
 /* Repair helpers */
 
-int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc, bool *fixed);
+int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc);
 void xrep_failure(struct xfs_mount *mp);
 int xrep_roll_ag_trans(struct xfs_scrub *sc);
 bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
@@ -64,8 +64,7 @@ int xrep_agi(struct xfs_scrub *sc);
 
 static inline int xrep_attempt(
 	struct xfs_inode	*ip,
-	struct xfs_scrub	*sc,
-	bool			*fixed)
+	struct xfs_scrub	*sc)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 6e18a1178e26..02d278b7d20b 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -476,7 +476,6 @@ xfs_scrub_metadata(
 		},
 	};
 	struct xfs_mount		*mp = ip->i_mount;
-	bool				already_fixed = false;
 	int				error = 0;
 
 	BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
@@ -521,7 +520,8 @@ xfs_scrub_metadata(
 	} else if (error)
 		goto out_teardown;
 
-	if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && !already_fixed) {
+	if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
+	    !(sc.flags & XREP_ALREADY_FIXED)) {
 		bool needs_fix;
 
 		/* Let debug users force us into the repair routines. */
@@ -544,7 +544,7 @@ xfs_scrub_metadata(
 		 * If it's broken, userspace wants us to fix it, and we haven't
 		 * already tried to fix it, then attempt a repair.
 		 */
-		error = xrep_attempt(ip, &sc, &already_fixed);
+		error = xrep_attempt(ip, &sc);
 		if (error == -EAGAIN) {
 			/*
 			 * Either the repair function succeeded or it couldn't
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 60359e7de930..1b23bf141438 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -63,16 +63,17 @@ struct xfs_scrub {
 	void				*buf;
 	uint				ilock_flags;
 
-	/* See the XCHK state flags below. */
+	/* See the XCHK/XREP state flags below. */
 	unsigned int			flags;
 
 	/* State tracking for single-AG operations. */
 	struct xchk_ag			sa;
 };
 
-/* XCHK state flags */
+/* XCHK/XREP state flags */
 #define XCHK_TRY_HARDER		(1 << 0)  /* can't get resources, try again */
 #define XCHK_HAS_QUOTAOFFLOCK	(1 << 1)  /* we hold the quotaoff lock */
+#define XREP_ALREADY_FIXED	(1 << 31) /* checking our repair work */
 
 /* Metadata scrubbers */
 int xchk_tester(struct xfs_scrub *sc);

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/4] xfs: scrub/repair should update filesystem metadata health
  2019-04-11  1:46 [PATCH v2 0/4] xfs: scrub/repair update health tracking Darrick J. Wong
                   ` (2 preceding siblings ...)
  2019-04-11  1:46 ` [PATCH 3/4] xfs: hoist the already_fixed variable to the scrub context Darrick J. Wong
@ 2019-04-11  1:46 ` Darrick J. Wong
  3 siblings, 0 replies; 5+ messages in thread
From: Darrick J. Wong @ 2019-04-11  1:46 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Now that we have the ability to track sick metadata in-core, make scrub
and repair update those health assessments after doing work.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/Makefile       |    1 
 fs/xfs/scrub/health.c |  172 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/health.h |   12 +++
 fs/xfs/scrub/scrub.c  |    4 +
 fs/xfs/scrub/scrub.h  |    7 ++
 5 files changed, 196 insertions(+)
 create mode 100644 fs/xfs/scrub/health.c
 create mode 100644 fs/xfs/scrub/health.h


diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 786379c143f4..b20964e26a22 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -143,6 +143,7 @@ xfs-y				+= $(addprefix scrub/, \
 				   common.o \
 				   dabtree.o \
 				   dir.o \
+				   health.o \
 				   ialloc.o \
 				   inode.o \
 				   parent.o \
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
new file mode 100644
index 000000000000..9738e4d5f0b2
--- /dev/null
+++ b/fs/xfs/scrub/health.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_health.h"
+#include "scrub/scrub.h"
+#include "scrub/health.h"
+
+/*
+ * Scrub and In-Core Filesystem Health Assessments
+ * ===============================================
+ *
+ * Online scrub and repair have the time and the ability to perform stronger
+ * checks than we can do from the metadata verifiers, because they can
+ * cross-reference records between data structures.  Therefore, scrub is in a
+ * good position to update the online filesystem health assessments to reflect
+ * the good/bad state of the data structure.
+ *
+ * We therefore extend scrub in the following ways to achieve this:
+ *
+ * 1. Create a "sick_mask_update" field in the scrub context.  When we're
+ * setting up a scrub call, set this to the default XFS_SICK_* flag(s) for the
+ * selected scrub type (call it A).  Scrub and repair functions can override
+ * the default sick_mask_update value if they choose.
+ *
+ * 2. If the scrubber returns a runtime error code, we exit making no changes
+ * to the incore sick state.
+ *
+ * 3. If the scrubber finds that A is clean, use sick_mask_update to clear the
+ * incore sick flags before exiting.
+ *
+ * 4. If the scrubber finds that A is corrupt, use sick_mask_update to set the
+ * incore sick flags.  If the user didn't want to repair then we exit, leaving
+ * the metadata structure unfixed and the sick flag set.
+ *
+ * 5. Now we know that A is corrupt and the user wants to repair, so run the
+ * repairer.  If the repairer returns an error code, we exit with that error
+ * code, having made no further changes to the incore sick state.
+ *
+ * 6. If repair rebuilds A correctly and the subsequent re-scrub of A is
+ * clean, use sick_mask_update to clear the incore sick flags.  This should
+ * have the effect that A is no longer marked sick.
+ *
+ * 7. If repair rebuilds A incorrectly, the re-scrub will find it corrupt and
+ * use sick_mask_update to set the incore sick flags.  This should have no
+ * externally visible effect since we already set them in step (4).
+ *
+ * There are some complications to this story, however.  For certain types of
+ * complementary metadata indices (e.g. inobt/finobt), it is easier to rebuild
+ * both structures at the same time.  The following principles apply to this
+ * type of repair strategy:
+ *
+ * 8. Any repair function that rebuilds multiple structures should update
+ * sick_mask_visible to reflect whatever other structures are rebuilt, and
+ * verify that all the rebuilt structures can pass a scrub check.  The
+ * outcomes of 5-7 still apply, but with a sick_mask_update that covers
+ * everything being rebuilt.
+ */
+
+/* Map our scrub type to a sick mask and a set of health update functions. */
+
+enum xchk_health_group {
+	XHG_FS = 1,
+	XHG_RT,
+	XHG_AG,
+	XHG_INO,
+};
+
+struct xchk_health_map {
+	enum xchk_health_group	group;
+	unsigned int		sick_mask;
+};
+
+static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
+	[XFS_SCRUB_TYPE_SB]		= { XHG_AG,  XFS_SICK_AG_SB },
+	[XFS_SCRUB_TYPE_AGF]		= { XHG_AG,  XFS_SICK_AG_AGF },
+	[XFS_SCRUB_TYPE_AGFL]		= { XHG_AG,  XFS_SICK_AG_AGFL },
+	[XFS_SCRUB_TYPE_AGI]		= { XHG_AG,  XFS_SICK_AG_AGI },
+	[XFS_SCRUB_TYPE_BNOBT]		= { XHG_AG,  XFS_SICK_AG_BNOBT },
+	[XFS_SCRUB_TYPE_CNTBT]		= { XHG_AG,  XFS_SICK_AG_CNTBT },
+	[XFS_SCRUB_TYPE_INOBT]		= { XHG_AG,  XFS_SICK_AG_INOBT },
+	[XFS_SCRUB_TYPE_FINOBT]		= { XHG_AG,  XFS_SICK_AG_FINOBT },
+	[XFS_SCRUB_TYPE_RMAPBT]		= { XHG_AG,  XFS_SICK_AG_RMAPBT },
+	[XFS_SCRUB_TYPE_REFCNTBT]	= { XHG_AG,  XFS_SICK_AG_REFCNTBT },
+	[XFS_SCRUB_TYPE_INODE]		= { XHG_INO, XFS_SICK_INO_CORE },
+	[XFS_SCRUB_TYPE_BMBTD]		= { XHG_INO, XFS_SICK_INO_BMBTD },
+	[XFS_SCRUB_TYPE_BMBTA]		= { XHG_INO, XFS_SICK_INO_BMBTA },
+	[XFS_SCRUB_TYPE_BMBTC]		= { XHG_INO, XFS_SICK_INO_BMBTC },
+	[XFS_SCRUB_TYPE_DIR]		= { XHG_INO, XFS_SICK_INO_DIR },
+	[XFS_SCRUB_TYPE_XATTR]		= { XHG_INO, XFS_SICK_INO_XATTR },
+	[XFS_SCRUB_TYPE_SYMLINK]	= { XHG_INO, XFS_SICK_INO_SYMLINK },
+	[XFS_SCRUB_TYPE_PARENT]		= { XHG_INO, XFS_SICK_INO_PARENT },
+	[XFS_SCRUB_TYPE_RTBITMAP]	= { XHG_RT,  XFS_SICK_RT_BITMAP },
+	[XFS_SCRUB_TYPE_RTSUM]		= { XHG_RT,  XFS_SICK_RT_SUMMARY },
+	[XFS_SCRUB_TYPE_UQUOTA]		= { XHG_FS,  XFS_SICK_FS_UQUOTA },
+	[XFS_SCRUB_TYPE_GQUOTA]		= { XHG_FS,  XFS_SICK_FS_GQUOTA },
+	[XFS_SCRUB_TYPE_PQUOTA]		= { XHG_FS,  XFS_SICK_FS_PQUOTA },
+};
+
+/* Return the health status mask for this scrub type. */
+unsigned int
+xchk_health_mask_for_scrub_type(
+	__u32			scrub_type)
+{
+	return type_to_health_flag[scrub_type].sick_mask;
+}
+
+/*
+ * Update filesystem health assessments based on what we found and did.
+ *
+ * If the scrubber finds errors, we mark sick whatever's mentioned in
+ * sick_mask_update, no matter whether this is a first scan or an
+ * evaluation of repair effectiveness.
+ *
+ * Otherwise, no direct corruption was found, so mark whatever's in
+ * sick_mask_update as healthy.
+ */
+void
+xchk_update_health(
+	struct xfs_scrub	*sc)
+{
+	struct xfs_perag	*pag;
+
+	if (!sc->sick_mask_update)
+		return;
+
+	switch (type_to_health_flag[sc->sm->sm_type].group) {
+	case XHG_AG:
+		pag = xfs_perag_get(sc->mp, sc->sm->sm_agno);
+		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+			xfs_ag_mark_sick(pag, sc->sick_mask_update);
+		else
+			xfs_ag_mark_healthy(pag, sc->sick_mask_update);
+		xfs_perag_put(pag);
+		break;
+	case XHG_INO:
+		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+			xfs_inode_mark_sick(sc->ip, sc->sick_mask_update);
+		else
+			xfs_inode_mark_healthy(sc->ip, sc->sick_mask_update);
+		break;
+	case XHG_FS:
+		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+			xfs_fs_mark_sick(sc->mp, sc->sick_mask_update);
+		else
+			xfs_fs_mark_healthy(sc->mp, sc->sick_mask_update);
+		break;
+	case XHG_RT:
+		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+			xfs_rt_mark_sick(sc->mp, sc->sick_mask_update);
+		else
+			xfs_rt_mark_healthy(sc->mp, sc->sick_mask_update);
+		break;
+	default:
+		ASSERT(0);
+		break;
+	}
+}
diff --git a/fs/xfs/scrub/health.h b/fs/xfs/scrub/health.h
new file mode 100644
index 000000000000..fd0d466c8658
--- /dev/null
+++ b/fs/xfs/scrub/health.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_SCRUB_HEALTH_H__
+#define __XFS_SCRUB_HEALTH_H__
+
+unsigned int xchk_health_mask_for_scrub_type(__u32 scrub_type);
+void xchk_update_health(struct xfs_scrub *sc);
+
+#endif /* __XFS_SCRUB_HEALTH_H__ */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 02d278b7d20b..01d5bfc1917c 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -40,6 +40,7 @@
 #include "scrub/trace.h"
 #include "scrub/btree.h"
 #include "scrub/repair.h"
+#include "scrub/health.h"
 
 /*
  * Online Scrub and Repair
@@ -498,6 +499,7 @@ xfs_scrub_metadata(
 	xchk_experimental_warning(mp);
 
 	sc.ops = &meta_scrub_ops[sm->sm_type];
+	sc.sick_mask_update = xchk_health_mask_for_scrub_type(sm->sm_type);
 retry_op:
 	/* Set up for the operation. */
 	error = sc.ops->setup(&sc, ip);
@@ -520,6 +522,8 @@ xfs_scrub_metadata(
 	} else if (error)
 		goto out_teardown;
 
+	xchk_update_health(&sc);
+
 	if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
 	    !(sc.flags & XREP_ALREADY_FIXED)) {
 		bool needs_fix;
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 1b23bf141438..b1d632f1c5ff 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -66,6 +66,13 @@ struct xfs_scrub {
 	/* See the XCHK/XREP state flags below. */
 	unsigned int			flags;
 
+	/*
+	 * The XFS_SICK_* flags that correspond to the metadata being scrubbed
+	 * or repaired.  We will use this mask to update the in-core fs health
+	 * status with whatever we find.
+	 */
+	unsigned int			sick_mask_update;
+
 	/* State tracking for single-AG operations. */
 	struct xchk_ag			sa;
 };

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-04-11  1:48 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-11  1:46 [PATCH v2 0/4] xfs: scrub/repair update health tracking Darrick J. Wong
2019-04-11  1:46 ` [PATCH 1/4] xfs: refactor scrub context initialization Darrick J. Wong
2019-04-11  1:46 ` [PATCH 2/4] xfs: collapse scrub bool state flags into a single unsigned int Darrick J. Wong
2019-04-11  1:46 ` [PATCH 3/4] xfs: hoist the already_fixed variable to the scrub context Darrick J. Wong
2019-04-11  1:46 ` [PATCH 4/4] xfs: scrub/repair should update filesystem metadata health Darrick J. Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.