From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 08/19] xfs: implement the metadata repair ioctl flag
Date: Fri, 25 Aug 2017 15:17:45 -0700 [thread overview]
Message-ID: <150369946542.9957.12755552154555796419.stgit@magnolia> (raw)
In-Reply-To: <150369940879.9957.6303798184036268321.stgit@magnolia>
From: Darrick J. Wong <darrick.wong@oracle.com>
Plumb in the pieces necessary to make the "scrub" subfunction of
the scrub ioctl actually work.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Kconfig | 17 ++++++++
fs/xfs/scrub/scrub.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++---
fs/xfs/scrub/scrub.h | 10 +++++
fs/xfs/xfs_error.c | 3 +
fs/xfs/xfs_error.h | 4 +-
5 files changed, 133 insertions(+), 8 deletions(-)
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index f42fcf1..06be67d 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -88,6 +88,23 @@ config XFS_ONLINE_SCRUB
If unsure, say N.
+config XFS_ONLINE_REPAIR
+ bool "XFS online metadata repair support"
+ default n
+ depends on XFS_FS && XFS_ONLINE_SCRUB
+ help
+ If you say Y here you will be able to repair metadata on a
+ mounted XFS filesystem. This feature is intended to reduce
+ filesystem downtime even further by fixing minor problems
+ before they cause the filesystem to go down. However, it
+ requires that the filesystem be formatted with secondary
+ metadata, such as reverse mappings and inode parent pointers.
+
+ This feature is considered EXPERIMENTAL. Use with caution!
+
+ See the xfs_scrub man page in section 8 for additional information.
+
+ If unsure, say N.
config XFS_WARN
bool "XFS Verbose Warnings"
depends on XFS_FS && !XFS_DEBUG
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 5f2c71d..cdc8233 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -42,6 +42,9 @@
#include "xfs_refcount_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
+#include "xfs_error.h"
+#include "xfs_log.h"
+#include "xfs_trans_priv.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -121,6 +124,24 @@
* XCORRUPT flag; btree query function errors are noted by setting the
* XFAIL flag and deleting the cursor to prevent further attempts to
* cross-reference with a defective btree.
+ *
+ * If a piece of metadata proves corrupt or suboptimal, the userspace
+ * program can ask the kernel to apply some tender loving care (TLC) to
+ * the metadata object by setting the REPAIR flag and re-calling the
+ * scrub ioctl. "Corruption" is defined by metadata violating the
+ * on-disk specification; operations cannot continue if the violation is
+ * left untreated. It is possible for XFS to continue if an object is
+ * "suboptimal", however performance may be degraded. Repairs are
+ * usually performed by rebuilding the metadata entirely out of
+ * redundant metadata. Optimizing, on the other hand, can sometimes be
+ * done without rebuilding entire structures.
+ *
+ * Generally speaking, the repair code has the following code structure:
+ * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock.
+ * The first check helps us figure out if we need to rebuild or simply
+ * optimize the structure so that the rebuild knows what to do. The
+ * second check evaluates the completeness of the repair; that is what
+ * is reported to userspace.
*/
/*
@@ -162,7 +183,10 @@ xfs_scrub_teardown(
{
xfs_scrub_ag_free(sc, &sc->sa);
if (sc->tp) {
- xfs_trans_cancel(sc->tp);
+ if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
+ error = xfs_trans_commit(sc->tp);
+ else
+ xfs_trans_cancel(sc->tp);
sc->tp = NULL;
}
if (sc->ip) {
@@ -184,6 +208,7 @@ static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
{ /* ioctl presence test */
.setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_tester,
+ .repair = xfs_scrub_tester,
},
{ /* superblock */
.setup = xfs_scrub_setup_ag_header,
@@ -295,6 +320,18 @@ static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
#endif
};
+#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
+static inline bool xfs_scrub_can_repair(struct xfs_mount *mp)
+{
+ return xfs_sb_version_hascrc(&mp->m_sb);
+}
+#else
+static inline bool xfs_scrub_can_repair(struct xfs_mount *mp)
+{
+ return false;
+}
+#endif
+
/* Dispatch metadata scrubbing. */
int
xfs_scrub_metadata(
@@ -304,7 +341,10 @@ xfs_scrub_metadata(
struct xfs_scrub_context sc;
struct xfs_mount *mp = ip->i_mount;
const struct xfs_scrub_meta_ops *ops;
+ char *errstr;
bool try_harder = false;
+ bool already_fixed = false;
+ bool was_corrupt = false;
int error = 0;
trace_xfs_scrub_start(ip, sm, error);
@@ -337,10 +377,17 @@ xfs_scrub_metadata(
if (ops->has && !ops->has(&mp->m_sb))
goto out;
- /* We don't know how to repair anything yet. */
- error = -EOPNOTSUPP;
- if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
- goto out;
+ /* Can we repair it? */
+ if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
+ /* Only allow repair for metadata we know how to fix. */
+ error = -EOPNOTSUPP;
+ if (!xfs_scrub_can_repair(mp) || ops->repair == NULL)
+ goto out;
+
+ error = -EROFS;
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ goto out;
+ }
/* This isn't a stable feature. Use with care. */
{
@@ -382,9 +429,55 @@ xfs_scrub_metadata(
} else if (error)
goto out_teardown;
+ /* Let debug users force us into the repair routines. */
+ if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && !already_fixed &&
+ XFS_TEST_ERROR(false, mp,
+ XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
+ sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ }
+ if (!already_fixed)
+ was_corrupt = !!(sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
+ XFS_SCRUB_OFLAG_XCORRUPT));
+
+ if (!already_fixed && xfs_scrub_should_fix(sc.sm)) {
+ xfs_scrub_ag_btcur_free(&sc.sa);
+
+ /* Ok, something's wrong. Repair it. */
+ trace_xfs_repair_attempt(ip, sc.sm, error);
+ error = sc.ops->repair(&sc);
+ trace_xfs_repair_done(ip, sc.sm, error);
+ if (!try_harder && error == -EDEADLOCK) {
+ error = xfs_scrub_teardown(&sc, ip, 0);
+ if (error)
+ goto out_dec;
+ try_harder = true;
+ goto retry_op;
+ } else if (error)
+ goto out_teardown;
+
+ /*
+ * Commit the fixes and perform a second dry-run scrub
+ * so that we can tell userspace if we fixed the problem.
+ */
+ error = xfs_scrub_teardown(&sc, ip, error);
+ if (error)
+ goto out_dec;
+ sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ already_fixed = true;
+ goto retry_op;
+ }
+
if (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
- XFS_SCRUB_OFLAG_XCORRUPT))
- xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
+ XFS_SCRUB_OFLAG_XCORRUPT)) {
+ if (sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+ errstr = "Corruption not fixed during online repair. "
+ "Unmount and run xfs_repair.";
+ else
+ errstr = "Corruption detected during scrub.";
+ xfs_alert_ratelimited(mp, errstr);
+ } else if (already_fixed && was_corrupt) {
+ xfs_alert_ratelimited(mp, "Corruption repaired during scrub.");
+ }
out_teardown:
error = xfs_scrub_teardown(&sc, ip, error);
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 3218664..0713eda 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -26,9 +26,19 @@ struct xfs_scrub_meta_ops {
int (*setup)(struct xfs_scrub_context *,
struct xfs_inode *);
int (*scrub)(struct xfs_scrub_context *);
+ int (*repair)(struct xfs_scrub_context *);
bool (*has)(struct xfs_sb *);
};
+/* Did userspace tell us we can repair /and/ we found something to fix? */
+static inline bool xfs_scrub_should_fix(struct xfs_scrub_metadata *sm)
+{
+ return (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
+ (sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
+ XFS_SCRUB_OFLAG_XCORRUPT |
+ XFS_SCRUB_OFLAG_PREEN));
+}
+
/* Buffer pointers and btree cursors for an entire AG. */
struct xfs_scrub_ag {
xfs_agnumber_t agno;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 8cebbaa..5ff86c4 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -57,6 +57,7 @@ static unsigned int xfs_errortag_random_default[] = {
XFS_RANDOM_AG_RESV_CRITICAL,
XFS_RANDOM_DROP_WRITES,
XFS_RANDOM_LOG_BAD_CRC,
+ XFS_RANDOM_FORCE_SCRUB_REPAIR,
};
struct xfs_errortag_attr {
@@ -161,6 +162,7 @@ XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE);
XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL);
XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES);
XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC);
+XFS_ERRORTAG_ATTR_RW(force_repair, XFS_ERRTAG_FORCE_SCRUB_REPAIR);
static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -193,6 +195,7 @@ static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
XFS_ERRORTAG_ATTR_LIST(drop_writes),
XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
+ XFS_ERRORTAG_ATTR_LIST(force_repair),
NULL,
};
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 7577be5..6ee23eb 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -106,7 +106,8 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
*/
#define XFS_ERRTAG_DROP_WRITES 28
#define XFS_ERRTAG_LOG_BAD_CRC 29
-#define XFS_ERRTAG_MAX 30
+#define XFS_ERRTAG_FORCE_SCRUB_REPAIR 30
+#define XFS_ERRTAG_MAX 31
/*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -141,6 +142,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_RANDOM_AG_RESV_CRITICAL 4
#define XFS_RANDOM_DROP_WRITES 1
#define XFS_RANDOM_LOG_BAD_CRC 1
+#define XFS_RANDOM_FORCE_SCRUB_REPAIR 1
#ifdef DEBUG
extern int xfs_errortag_init(struct xfs_mount *mp);
next prev parent reply other threads:[~2017-08-25 22:17 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-25 22:16 [PATCH v9 00/19] xfs: online fs repair support Darrick J. Wong
2017-08-25 22:16 ` [PATCH 01/19] xfs: add helpers to calculate btree size Darrick J. Wong
2017-08-25 22:17 ` [PATCH 02/19] xfs: expose various functions to repair code Darrick J. Wong
2017-08-25 22:17 ` [PATCH 03/19] xfs: add repair helpers for the reverse mapping btree Darrick J. Wong
2017-08-25 22:17 ` [PATCH 04/19] xfs: add repair helpers for the reference count btree Darrick J. Wong
2017-08-25 22:17 ` [PATCH 05/19] xfs: add BMAPI_NORMAP flag to perform block remapping without updating rmpabt Darrick J. Wong
2017-08-25 22:17 ` [PATCH 06/19] xfs: halt auto-reclamation activities while rebuilding rmap Darrick J. Wong
2017-08-25 22:17 ` [PATCH 07/19] xfs: create tracepoints for online repair Darrick J. Wong
2017-08-25 22:17 ` Darrick J. Wong [this message]
2017-08-25 22:17 ` [PATCH 09/19] xfs: add helper routines for the repair code Darrick J. Wong
2017-08-25 22:17 ` [PATCH 10/19] xfs: repair superblocks Darrick J. Wong
2017-08-25 22:18 ` [PATCH 11/19] xfs: repair the AGF and AGFL Darrick J. Wong
2017-08-25 22:18 ` [PATCH 12/19] xfs: rebuild the AGI Darrick J. Wong
2017-08-25 22:18 ` [PATCH 13/19] xfs: repair free space btrees Darrick J. Wong
2017-08-25 22:18 ` [PATCH 14/19] xfs: repair inode btrees Darrick J. Wong
2017-08-25 22:18 ` [PATCH 15/19] xfs: rebuild the rmapbt Darrick J. Wong
2017-08-25 22:18 ` [PATCH 16/19] xfs: repair refcount btrees Darrick J. Wong
2017-08-25 22:18 ` [PATCH 17/19] xfs: online repair of inodes Darrick J. Wong
2017-08-25 22:18 ` [PATCH 18/19] xfs: repair inode block maps Darrick J. Wong
2017-08-25 22:18 ` [PATCH 19/19] xfs: repair damaged symlinks Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=150369946542.9957.12755552154555796419.stgit@magnolia \
--to=darrick.wong@oracle.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).