All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 03/19] xfs: create an ioctl to scrub AG metadata
Date: Fri, 02 Jun 2017 15:31:05 -0700	[thread overview]
Message-ID: <149644266587.19430.995760719228449517.stgit@birch.djwong.org> (raw)
In-Reply-To: <149644264601.19430.6064928774771048026.stgit@birch.djwong.org>

From: Darrick J. Wong <darrick.wong@oracle.com>

Create an ioctl that can be used to scrub internal filesystem metadata.
The new ioctl takes the metadata type, an (optional) AG number, an
(optional) inode number and generation, and a flags argument.  This will
be used by the upcoming XFS online scrub tool.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/Makefile          |    5 
 fs/xfs/libxfs/xfs_fs.h   |   39 ++++
 fs/xfs/scrub/common.c    |  508 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/common.h    |  165 +++++++++++++++
 fs/xfs/scrub/xfs_scrub.h |   29 +++
 fs/xfs/xfs_ioctl.c       |   28 +++
 fs/xfs/xfs_ioctl32.c     |    1 
 fs/xfs/xfs_trace.h       |    7 +
 8 files changed, 781 insertions(+), 1 deletion(-)
 create mode 100644 fs/xfs/scrub/common.c
 create mode 100644 fs/xfs/scrub/common.h
 create mode 100644 fs/xfs/scrub/xfs_scrub.h


diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 65bf642..b9bacf2f 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -102,6 +102,11 @@ xfs-y				+= xfs_aops.o \
 				   kmem.o \
 				   uuid.o
 
+# online scrub/repair
+xfs-$(CONFIG_XFS_DEBUG)		+= $(addprefix scrub/, \
+				   common.o \
+				   )
+
 # low-level transaction/log code
 xfs-y				+= xfs_log.o \
 				   xfs_log_cil.o \
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b0d3099..4bff411 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -472,6 +472,44 @@ typedef struct xfs_swapext
 #define XFS_FSOP_GOING_FLAGS_LOGFLUSH		0x1	/* flush log but not data */
 #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH		0x2	/* don't flush log nor data */
 
+/* metadata scrubbing */
+struct xfs_scrub_metadata {
+	__u32 sm_type;		/* What to check? */
+	__u32 sm_flags;		/* flags; see below. */
+	union {
+		__u32		__agno;
+		struct {
+			__u64	__ino;
+			__u32	__gen;
+		} i;
+		__u64		__reserved[7];	/* pad to 64 bytes */
+	} p;
+};
+#define sm_agno	p.__agno
+#define sm_ino	p.i.__ino
+#define sm_gen	p.i.__gen
+
+/*
+ * Metadata types and flags for scrub operation.
+ */
+#define XFS_SCRUB_TYPE_TEST	0	/* dummy to test ioctl */
+#define XFS_SCRUB_TYPE_MAX	0
+
+#define XFS_SCRUB_FLAG_REPAIR	0x01	/* i: repair this metadata */
+#define XFS_SCRUB_FLAG_CORRUPT	0x02	/* o: needs repair */
+#define XFS_SCRUB_FLAG_PREEN	0x04	/* o: could be optimized */
+#define XFS_SCRUB_FLAG_XFAIL	0x08	/* o: incomplete xref */
+#define XFS_SCRUB_FLAG_XCORRUPT	0x10	/* o: corruption during xref */
+#define XFS_SCRUB_FLAG_INCOMPLETE	0x20	/* o: scan was not complete */
+
+#define XFS_SCRUB_FLAGS_IN	(XFS_SCRUB_FLAG_REPAIR)
+#define XFS_SCRUB_FLAGS_OUT	(XFS_SCRUB_FLAG_CORRUPT | \
+				 XFS_SCRUB_FLAG_PREEN | \
+				 XFS_SCRUB_FLAG_XFAIL | \
+				 XFS_SCRUB_FLAG_XCORRUPT | \
+				 XFS_SCRUB_FLAG_INCOMPLETE)
+#define XFS_SCRUB_FLAGS_ALL	(XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
+
 /*
  * AG reserved block counters
  */
@@ -524,6 +562,7 @@ struct xfs_fsop_ag_resblks {
 #define XFS_IOC_ZERO_RANGE	_IOW ('X', 57, struct xfs_flock64)
 #define XFS_IOC_FREE_EOFBLOCKS	_IOR ('X', 58, struct xfs_fs_eofblocks)
 /*	XFS_IOC_GETFSMAP ------ hoisted 59         */
+#define XFS_IOC_SCRUB_METADATA	_IOWR('X', 60, struct xfs_scrub_metadata)
 
 /*
  * ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
new file mode 100644
index 0000000..d2cdf98
--- /dev/null
+++ b/fs/xfs/scrub/common.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/common.h"
+
+/*
+ * Online Scrub and Repair
+ *
+ * Traditionally, XFS (the kernel driver) did not know how to check or
+ * repair on-disk data structures.  That task was left to the xfs_check
+ * and xfs_repair tools, both of which require taking the filesystem
+ * offline for a thorough but time consuming examination.  Online
+ * scrub & repair, on the other hand, enables us to check the metadata
+ * for obvious errors while carefully stepping around the filesystem's
+ * ongoing operations, locking rules, etc.
+ *
+ * Given that most XFS metadata consist of records stored in a btree,
+ * most of the checking functions iterate the btree blocks themselves
+ * looking for irregularities.  When a record block is encountered, each
+ * record can be checked for obviously bad values.  Record values can
+ * also be cross-referenced against other btrees to look for potential
+ * misunderstandings between pieces of metadata.
+ *
+ * It is expected that the checkers responsible for per-AG metadata
+ * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
+ * metadata structure, and perform any relevant cross-referencing before
+ * unlocking the AG and returning the results to userspace.  These
+ * scrubbers must not keep an AG locked for too long to avoid tying up
+ * the block and inode allocators.
+ *
+ * Block maps and b-trees rooted in an inode present a special challenge
+ * because they can involve extents from any AG.  The general scrubber
+ * structure of lock -> check -> xref -> unlock still holds, but AG
+ * locking order rules /must/ be obeyed to avoid deadlocks.  The
+ * ordering rule, of course, is that we must lock in increasing AG
+ * order.  Helper functions are provided to track which AG headers we've
+ * already locked.  If we detect an imminent locking order violation, we
+ * can signal a potential deadlock, in which case the scrubber can jump
+ * out to the top level, lock all the AGs in order, and retry the scrub.
+ *
+ * For file data (directories, extended attributes, symlinks) scrub, we
+ * can simply lock the inode and walk the data.  For btree data
+ * (directories and attributes) we follow the same btree-scrubbing
+ * strategy outlined previously to check the records.
+ *
+ * We use a bit of trickery with transactions to avoid buffer deadlocks
+ * if there is a cycle in the metadata.  The basic problem is that
+ * travelling down a btree involves locking the current buffer at each
+ * tree level.  If a pointer should somehow point back to a buffer that
+ * we've already examined, we will deadlock due to the second buffer
+ * locking attempt.  Note however that grabbing a buffer in transaction
+ * context links the locked buffer to the transaction.  If we try to
+ * re-grab the buffer in the context of the same transaction, we avoid
+ * the second lock attempt and continue.  Between the verifier and the
+ * scrubber, something will notice that something is amiss and report
+ * the corruption.  Therefore, each scrubber will allocate an empty
+ * transaction, attach buffers to it, and cancel the transaction at the
+ * end of the scrub run.  Cancelling a non-dirty transaction simply
+ * unlocks the buffers.
+ *
+ * There are four pieces of data that scrub can communicate to
+ * userspace.  The first is the error code (errno), which can be used to
+ * communicate operational errors in performing the scrub.  There are
+ * also three flags that can be set in the scrub context.  If the data
+ * structure itself is corrupt, the CORRUPT flag will be set.  If
+ * the metadata is correct but otherwise suboptimal, the PREEN flag
+ * will be set.
+ */
+
+struct xfs_scrub_meta_fns {
+	int		(*setup)(struct xfs_scrub_context *,
+				 struct xfs_inode *);
+	int		(*scrub)(struct xfs_scrub_context *);
+	bool		(*has)(struct xfs_sb *);
+};
+
+/* Check for operational errors. */
+bool
+xfs_scrub_op_ok(
+	struct xfs_scrub_context	*sc,
+	xfs_agnumber_t			agno,
+	xfs_agblock_t			bno,
+	const char			*type,
+	int				*error,
+	const char			*func,
+	int				line)
+{
+	struct xfs_mount		*mp = sc->mp;
+
+	switch (*error) {
+	case 0:
+		return true;
+	case -EDEADLOCK:
+		/* Used to restart an op with deadlock avoidance. */
+		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
+		break;
+	case -EFSBADCRC:
+	case -EFSCORRUPTED:
+		/* Note the badness but don't abort. */
+		sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+		*error = 0;
+		/* fall through */
+	default:
+		trace_xfs_scrub_op_error(mp, agno, bno, type, *error, func,
+				line);
+		break;
+	}
+	return false;
+}
+
+/* Check for operational errors for a file offset. */
+bool
+xfs_scrub_file_op_ok(
+	struct xfs_scrub_context	*sc,
+	int				whichfork,
+	xfs_fileoff_t			offset,
+	const char			*type,
+	int				*error,
+	const char			*func,
+	int				line)
+{
+	switch (*error) {
+	case 0:
+		return true;
+	case -EDEADLOCK:
+		/* Used to restart an op with deadlock avoidance. */
+		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
+		break;
+	case -EFSBADCRC:
+	case -EFSCORRUPTED:
+		/* Note the badness but don't abort. */
+		sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+		*error = 0;
+		/* fall through */
+	default:
+		trace_xfs_scrub_file_op_error(sc->ip, whichfork, offset, type,
+				*error, func, line);
+		break;
+	}
+	return false;
+}
+
+/* Check for metadata block optimization possibilities. */
+bool
+xfs_scrub_block_preen(
+	struct xfs_scrub_context	*sc,
+	struct xfs_buf			*bp,
+	const char			*type,
+	bool				fs_ok,
+	const char			*check,
+	const char			*func,
+	int				line)
+{
+	struct xfs_mount		*mp = sc->mp;
+	xfs_fsblock_t			fsbno;
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			bno;
+
+	if (fs_ok)
+		return fs_ok;
+
+	fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+	agno = XFS_FSB_TO_AGNO(mp, fsbno);
+	bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+	sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
+	trace_xfs_scrub_block_preen(mp, agno, bno, type, check, func, line);
+	return fs_ok;
+}
+
+/* Check for metadata block corruption. */
+bool
+xfs_scrub_block_ok(
+	struct xfs_scrub_context	*sc,
+	struct xfs_buf			*bp,
+	const char			*type,
+	bool				fs_ok,
+	const char			*check,
+	const char			*func,
+	int				line)
+{
+	struct xfs_mount		*mp = sc->mp;
+	xfs_fsblock_t			fsbno;
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			bno;
+
+	if (fs_ok)
+		return fs_ok;
+
+	fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+	agno = XFS_FSB_TO_AGNO(mp, fsbno);
+	bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+	sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+	trace_xfs_scrub_block_error(mp, agno, bno, type, check, func, line);
+	return fs_ok;
+}
+
+/* Check for inode metadata corruption. */
+bool
+xfs_scrub_ino_ok(
+	struct xfs_scrub_context	*sc,
+	xfs_ino_t			ino,
+	struct xfs_buf			*bp,
+	const char			*type,
+	bool				fs_ok,
+	const char			*check,
+	const char			*func,
+	int				line)
+{
+	struct xfs_inode		*ip = sc->ip;
+	struct xfs_mount		*mp = sc->mp;
+	xfs_fsblock_t			fsbno;
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			bno;
+
+	if (fs_ok)
+		return fs_ok;
+
+	if (bp) {
+		fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+		agno = XFS_FSB_TO_AGNO(mp, fsbno);
+		bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+	} else {
+		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+		bno = XFS_INO_TO_AGINO(mp, ip->i_ino);
+	}
+
+	sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+	trace_xfs_scrub_ino_error(mp, ino, agno, bno, type, check, func, line);
+	return fs_ok;
+}
+
+/* Check for inode metadata optimization possibilities. */
+bool
+xfs_scrub_ino_preen(
+	struct xfs_scrub_context	*sc,
+	struct xfs_buf			*bp,
+	const char			*type,
+	bool				fs_ok,
+	const char			*check,
+	const char			*func,
+	int				line)
+{
+	struct xfs_inode		*ip = sc->ip;
+	struct xfs_mount		*mp = sc->mp;
+	xfs_fsblock_t			fsbno;
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			bno;
+
+	if (fs_ok)
+		return fs_ok;
+
+	if (bp) {
+		fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+		agno = XFS_FSB_TO_AGNO(mp, fsbno);
+		bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+	} else {
+		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+		bno = XFS_INO_TO_AGINO(mp, ip->i_ino);
+	}
+
+	sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
+	trace_xfs_scrub_ino_preen(mp, ip->i_ino, agno, bno, type, check,
+			func, line);
+	return fs_ok;
+}
+
+/* Check for file data block corruption. */
+bool
+xfs_scrub_data_ok(
+	struct xfs_scrub_context	*sc,
+	int				whichfork,
+	xfs_fileoff_t			offset,
+	const char			*type,
+	bool				fs_ok,
+	const char			*check,
+	const char			*func,
+	int				line)
+{
+	if (fs_ok)
+		return fs_ok;
+
+	sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+	trace_xfs_scrub_data_error(sc->ip, whichfork, offset, type, check,
+			func, line);
+	return fs_ok;
+}
+
+/* Signal an incomplete scrub. */
+bool
+xfs_scrub_incomplete(
+	struct xfs_scrub_context	*sc,
+	const char			*type,
+	bool				fs_ok,
+	const char			*check,
+	const char			*func,
+	int				line)
+{
+	if (fs_ok)
+		return fs_ok;
+
+	sc->sm->sm_flags |= XFS_SCRUB_FLAG_INCOMPLETE;
+	trace_xfs_scrub_incomplete(sc->mp, type, check, func, line);
+	return fs_ok;
+}
+
+/* Dummy scrubber */
+
+STATIC int
+xfs_scrub_dummy(
+	struct xfs_scrub_context	*sc)
+{
+	if (sc->sm->sm_gen & XFS_SCRUB_FLAG_CORRUPT)
+		sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+	if (sc->sm->sm_gen & XFS_SCRUB_FLAG_PREEN)
+		sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
+	if (sc->sm->sm_gen & XFS_SCRUB_FLAG_XFAIL)
+		sc->sm->sm_flags |= XFS_SCRUB_FLAG_XFAIL;
+	if (sc->sm->sm_gen & XFS_SCRUB_FLAG_XCORRUPT)
+		sc->sm->sm_flags |= XFS_SCRUB_FLAG_XCORRUPT;
+	if (sc->sm->sm_gen & ~XFS_SCRUB_FLAGS_OUT)
+		return -ENOENT;
+
+	return 0;
+}
+
+/* Per-scrubber setup functions */
+
+/* Set us up with a transaction and an empty context. */
+int
+xfs_scrub_setup_fs(
+	struct xfs_scrub_context	*sc,
+	struct xfs_inode		*ip)
+{
+	return xfs_scrub_trans_alloc(sc->sm, sc->mp,
+			&M_RES(sc->mp)->tr_itruncate, 0, 0, 0, &sc->tp);
+}
+
+/* Scrub setup and teardown */
+
+/* Free all the resources and finish the transactions. */
+STATIC int
+xfs_scrub_teardown(
+	struct xfs_scrub_context	*sc,
+	int				error)
+{
+	if (sc->tp) {
+		xfs_trans_cancel(sc->tp);
+		sc->tp = NULL;
+	}
+	return error;
+}
+
+/* Perform common scrub context initialization. */
+STATIC int
+xfs_scrub_setup(
+	struct xfs_inode		*ip,
+	struct xfs_scrub_context	*sc,
+	const struct xfs_scrub_meta_fns	*fns,
+	struct xfs_scrub_metadata	*sm,
+	bool				try_harder)
+{
+	memset(sc, 0, sizeof(*sc));
+	sc->mp = ip->i_mount;
+	sc->sm = sm;
+	sc->fns = fns;
+	sc->try_harder = try_harder;
+
+	return sc->fns->setup(sc, ip);
+}
+
+/* Scrubbing dispatch. */
+
+static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
+	{ /* dummy verifier */
+		.setup	= xfs_scrub_setup_fs,
+		.scrub	= xfs_scrub_dummy,
+	},
+};
+
+/* Dispatch metadata scrubbing. */
+int
+xfs_scrub_metadata(
+	struct xfs_inode		*ip,
+	struct xfs_scrub_metadata	*sm)
+{
+	struct xfs_scrub_context	sc;
+	struct xfs_mount		*mp = ip->i_mount;
+	const struct xfs_scrub_meta_fns	*fns;
+	bool				try_harder = false;
+	int				error = 0;
+
+	trace_xfs_scrub(ip, sm, error);
+
+	/* Forbidden if we are shut down or mounted norecovery. */
+	error = -ESHUTDOWN;
+	if (XFS_FORCED_SHUTDOWN(mp))
+		goto out;
+	error = -ENOTRECOVERABLE;
+	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+		goto out;
+
+	/* Check our inputs. */
+	error = -EINVAL;
+	sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+	if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
+		goto out;
+
+	/* Do we know about this type of metadata? */
+	error = -ENOENT;
+	if (sm->sm_type > XFS_SCRUB_TYPE_MAX)
+		goto out;
+	fns = &meta_scrub_fns[sm->sm_type];
+	if (fns->scrub == NULL)
+		goto out;
+
+	/* Does this fs even support this type of metadata? */
+	if (fns->has && !fns->has(&mp->m_sb))
+		goto out;
+
+	/* We don't know how to repair anything yet. */
+	error = -EOPNOTSUPP;
+	if (sm->sm_flags & XFS_SCRUB_FLAG_REPAIR)
+		goto out;
+
+	/* This isn't a stable feature.  Use with care. */
+	{
+		static bool warned;
+
+		if (!warned)
+			xfs_alert(mp,
+	"EXPERIMENTAL online scrub feature in use. Use at your own risk!");
+		warned = true;
+	}
+
+retry_op:
+	/* Set up for the operation. */
+	error = xfs_scrub_setup(ip, &sc, fns, sm, try_harder);
+	if (error)
+		goto out_teardown;
+
+	/* Scrub for errors. */
+	error = fns->scrub(&sc);
+	if (!try_harder && error == -EDEADLOCK) {
+		/*
+		 * Scrubbers return -EDEADLOCK to mean 'try harder'.
+		 * Tear down everything we hold, then set up again with
+		 * preparation for worst-case scenarios.
+		 */
+		error = xfs_scrub_teardown(&sc, 0);
+		if (error)
+			goto out;
+		try_harder = true;
+		goto retry_op;
+	} else if (error)
+		goto out_teardown;
+
+	if (xfs_scrub_found_corruption(sm))
+		xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
+
+out_teardown:
+	error = xfs_scrub_teardown(&sc, error);
+out:
+	trace_xfs_scrub_done(ip, sm, error);
+	return error;
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
new file mode 100644
index 0000000..7d2c4bf
--- /dev/null
+++ b/fs/xfs/scrub/common.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __XFS_REPAIR_COMMON_H__
+#define __XFS_REPAIR_COMMON_H__
+
+/* Did we find something broken? */
+static inline bool xfs_scrub_found_corruption(struct xfs_scrub_metadata *sm)
+{
+	return sm->sm_flags & (XFS_SCRUB_FLAG_CORRUPT |
+			       XFS_SCRUB_FLAG_XCORRUPT);
+}
+
+struct xfs_scrub_context {
+	/* General scrub state. */
+	struct xfs_mount		*mp;
+	struct xfs_scrub_metadata	*sm;
+	const struct xfs_scrub_meta_fns	*fns;
+	struct xfs_trans		*tp;
+	struct xfs_inode		*ip;
+	bool				try_harder;
+};
+
+/* Should we end the scrub early? */
+static inline bool
+xfs_scrub_should_terminate(
+	int		*error)
+{
+	if (fatal_signal_pending(current)) {
+		if (*error == 0)
+			*error = -EAGAIN;
+		return true;
+	}
+	return false;
+}
+
+/*
+ * Grab a transaction.  If we're going to repair something, we need to
+ * ensure there's enough reservation to make all the changes.  If not,
+ * we can use an empty transaction.
+ */
+static inline int
+xfs_scrub_trans_alloc(
+	struct xfs_scrub_metadata	*sm,
+	struct xfs_mount		*mp,
+	struct xfs_trans_res		*resp,
+	uint				blocks,
+	uint				rtextents,
+	uint				flags,
+	struct xfs_trans		**tpp)
+{
+	return xfs_trans_alloc_empty(mp, tpp);
+}
+
+/* Check for operational errors. */
+bool xfs_scrub_op_ok(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+		     xfs_agblock_t bno, const char *type, int *error,
+		     const char	*func, int line);
+#define XFS_SCRUB_OP_ERROR_GOTO(sc, agno, bno, type, error, label) \
+	do { \
+		if (!xfs_scrub_op_ok((sc), (agno), (bno), (type), \
+				(error), __func__, __LINE__)) \
+			goto label; \
+	} while (0)
+
+/* Check for operational errors for a file offset. */
+bool xfs_scrub_file_op_ok(struct xfs_scrub_context *sc, int whichfork,
+			  xfs_fileoff_t offset, const char *type,
+			  int *error, const char *func, int line);
+#define XFS_SCRUB_FILE_OP_ERROR_GOTO(sc, which, off, type, error, label) \
+	do { \
+		if (!xfs_scrub_file_op_ok((sc), (which), (off), (type), \
+				(error), __func__, __LINE__)) \
+			goto label; \
+	} while (0)
+
+/* Check for metadata block optimization possibilities. */
+bool xfs_scrub_block_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp,
+			   const char *type, bool fs_ok, const char *check,
+			   const char *func, int line);
+#define XFS_SCRUB_PREEN(sc, bp, type, fs_ok) \
+	xfs_scrub_block_preen((sc), (bp), (type), (fs_ok), #fs_ok, \
+			__func__, __LINE__)
+
+/* Check for inode metadata optimization possibilities. */
+bool xfs_scrub_ino_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp,
+		      const char *type, bool fs_ok, const char *check,
+		      const char *func, int line);
+#define XFS_SCRUB_INO_PREEN(sc, bp, type, fs_ok) \
+	xfs_scrub_ino_preen((sc), (bp), (type), (fs_ok), #fs_ok, \
+			__func__, __LINE__)
+
+/* Check for metadata block corruption. */
+bool xfs_scrub_block_ok(struct xfs_scrub_context *sc, struct xfs_buf *bp,
+			const char *type, bool fs_ok, const char *check,
+			const char *func, int line);
+#define XFS_SCRUB_CHECK(sc, bp, type, fs_ok) \
+	xfs_scrub_block_ok((sc), (bp), (type), (fs_ok), #fs_ok, \
+			__func__, __LINE__)
+#define XFS_SCRUB_GOTO(sc, bp, type, fs_ok, label) \
+	do { \
+		if (!xfs_scrub_block_ok((sc), (bp), (type), (fs_ok), \
+				#fs_ok, __func__, __LINE__)) \
+			goto label; \
+	} while (0)
+
+/* Check for inode metadata corruption. */
+bool xfs_scrub_ino_ok(struct xfs_scrub_context *sc, xfs_ino_t ino,
+		      struct xfs_buf *bp, const char *type, bool fs_ok,
+		      const char *check, const char *func, int line);
+#define XFS_SCRUB_INO_CHECK(sc, ino, bp, type, fs_ok) \
+	xfs_scrub_ino_ok((sc), (ino), (bp), (type), (fs_ok), #fs_ok, \
+			__func__, __LINE__)
+#define XFS_SCRUB_INO_GOTO(sc, ino, bp, type, fs_ok, label) \
+	do { \
+		if (!xfs_scrub_ino_ok((sc), (ino), (bp), (type), (fs_ok), \
+				#fs_ok, __func__, __LINE__)) \
+			goto label; \
+	} while (0)
+
+/* Check for file data block corruption. */
+bool xfs_scrub_data_ok(struct xfs_scrub_context *sc, int whichfork,
+		       xfs_fileoff_t offset, const char *type, bool fs_ok,
+		       const char *check, const char *func, int line);
+#define XFS_SCRUB_DATA_CHECK(sc, whichfork, offset, type, fs_ok) \
+	xfs_scrub_data_ok((sc), (whichfork), (offset), (type), (fs_ok), \
+			#fs_ok, __func__, __LINE__)
+#define XFS_SCRUB_DATA_GOTO(sc, whichfork, offset, type, fs_ok, label) \
+	do { \
+		if (!xfs_scrub_data_ok((sc), (whichfork), (offset), \
+				(type), (fs_ok), #fs_ok, __func__, __LINE__)) \
+			goto label; \
+	} while (0)
+
+/* Signal an incomplete scrub. */
+bool xfs_scrub_incomplete(struct xfs_scrub_context *sc, const char *type,
+			  bool fs_ok, const char *check, const char *func,
+			  int line);
+#define XFS_SCRUB_INCOMPLETE(sc, type, fs_ok) \
+	xfs_scrub_incomplete((sc), (type), (fs_ok), \
+			#fs_ok, __func__, __LINE__)
+
+/* Setup functions */
+
+#define SETUP_FN(name) int name(struct xfs_scrub_context *sc, struct xfs_inode *ip)
+SETUP_FN(xfs_scrub_setup_fs);
+#undef SETUP_FN
+
+#endif	/* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/scrub/xfs_scrub.h b/fs/xfs/scrub/xfs_scrub.h
new file mode 100644
index 0000000..64e21b4
--- /dev/null
+++ b/fs/xfs/scrub/xfs_scrub.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __XFS_SCRUB_H__
+#define __XFS_SCRUB_H__
+
+#ifndef CONFIG_XFS_DEBUG
+# define xfs_scrub_metadata(ip, sm)	(-ENOTTY)
+#else
+int xfs_scrub_metadata(struct xfs_inode *ip, struct xfs_scrub_metadata *sm);
+#endif /* CONFIG_XFS_DEBUG */
+
+#endif	/* __XFS_SCRUB_H__ */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index f8e894b..2777d90 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -44,6 +44,7 @@
 #include "xfs_btree.h"
 #include <linux/fsmap.h>
 #include "xfs_fsmap.h"
+#include "scrub/xfs_scrub.h"
 
 #include <linux/capability.h>
 #include <linux/cred.h>
@@ -1690,6 +1691,30 @@ xfs_ioc_getfsmap(
 	return 0;
 }
 
+STATIC int
+xfs_ioc_scrub_metadata(
+	struct xfs_inode		*ip,
+	void				__user *arg)
+{
+	struct xfs_scrub_metadata	scrub;
+	int				error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&scrub, arg, sizeof(scrub)))
+		return -EFAULT;
+
+	error = xfs_scrub_metadata(ip, &scrub);
+	if (error)
+		return error;
+
+	if (copy_to_user(arg, &scrub, sizeof(scrub)))
+		return -EFAULT;
+
+	return 0;
+}
+
 int
 xfs_ioc_swapext(
 	xfs_swapext_t	*sxp)
@@ -1873,6 +1898,9 @@ xfs_file_ioctl(
 	case FS_IOC_GETFSMAP:
 		return xfs_ioc_getfsmap(ip, arg);
 
+	case XFS_IOC_SCRUB_METADATA:
+		return xfs_ioc_scrub_metadata(ip, arg);
+
 	case XFS_IOC_FD_TO_HANDLE:
 	case XFS_IOC_PATH_TO_HANDLE:
 	case XFS_IOC_PATH_TO_FSHANDLE: {
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index e8b4de3..972d4bd 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -557,6 +557,7 @@ xfs_file_compat_ioctl(
 	case XFS_IOC_ERROR_CLEARALL:
 	case FS_IOC_GETFSMAP:
 	case XFS_IOC_GET_AG_RESBLKS:
+	case XFS_IOC_SCRUB_METADATA:
 		return xfs_file_ioctl(filp, cmd, p);
 #ifndef BROKEN_X86_ALIGNMENT
 	/* These are handled fine if no alignment issues */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7ab1bd3..0ee8add 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3330,7 +3330,7 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
 
 /* scrub */
 #define XFS_SCRUB_TYPE_DESC \
-	{ 0, NULL }
+	{ XFS_SCRUB_TYPE_TEST,		"dummy" }
 DECLARE_EVENT_CLASS(xfs_scrub_class,
 	TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
 		 int error),
@@ -3348,6 +3348,11 @@ DECLARE_EVENT_CLASS(xfs_scrub_class,
 	TP_fast_assign(
 		__entry->dev = ip->i_mount->m_super->s_dev;
 		__entry->ino = ip->i_ino;
+		__entry->type = sm->sm_type;
+		__entry->agno = sm->sm_agno;
+		__entry->inum = sm->sm_ino;
+		__entry->gen = sm->sm_gen;
+		__entry->flags = sm->sm_flags;
 		__entry->error = error;
 	),
 	TP_printk("dev %d:%d ino %llu type %s agno %u inum %llu gen %u flags 0x%x error %d",


  parent reply	other threads:[~2017-06-02 22:31 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-02 22:30 [PATCH v7 00/19] xfs: online scrub support Darrick J. Wong
2017-06-02 22:30 ` [PATCH 01/19] xfs: query the per-AG reservation counters Darrick J. Wong
2017-06-02 22:30 ` [PATCH 02/19] xfs: add scrub tracepoints Darrick J. Wong
2017-06-02 22:31 ` Darrick J. Wong [this message]
2017-06-02 22:31 ` [PATCH 04/19] xfs: generic functions to scrub metadata and btrees Darrick J. Wong
2017-06-02 22:31 ` [PATCH 05/19] xfs: scrub the backup superblocks Darrick J. Wong
2017-06-02 22:31 ` [PATCH 06/19] xfs: scrub AGF and AGFL Darrick J. Wong
2017-06-02 22:31 ` [PATCH 07/19] xfs: scrub the AGI Darrick J. Wong
2017-06-02 22:31 ` [PATCH 08/19] xfs: scrub free space btrees Darrick J. Wong
2017-06-02 22:31 ` [PATCH 09/19] xfs: scrub inode btrees Darrick J. Wong
2017-06-02 22:31 ` [PATCH 10/19] xfs: scrub rmap btrees Darrick J. Wong
2017-06-02 22:31 ` [PATCH 11/19] xfs: scrub refcount btrees Darrick J. Wong
2017-06-02 22:32 ` [PATCH 12/19] xfs: scrub inodes Darrick J. Wong
2017-06-02 22:32 ` [PATCH 13/19] xfs: scrub inode block mappings Darrick J. Wong
2017-06-02 22:32 ` [PATCH 14/19] xfs: scrub directory/attribute btrees Darrick J. Wong
2017-06-02 22:32 ` [PATCH 15/19] xfs: scrub directory metadata Darrick J. Wong
2017-06-02 22:32 ` [PATCH 16/19] xfs: scrub directory freespace Darrick J. Wong
2017-06-02 22:32 ` [PATCH 17/19] xfs: scrub extended attributes Darrick J. Wong
2017-06-02 22:32 ` [PATCH 18/19] xfs: scrub symbolic links Darrick J. Wong
2017-06-02 22:32 ` [PATCH 19/19] xfs: scrub realtime bitmap/summary Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2017-03-10 23:19 [PATCH v6A 00/19] xfs: online scrub support Darrick J. Wong
2017-03-10 23:20 ` [PATCH 03/19] xfs: create an ioctl to scrub AG metadata Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=149644266587.19430.995760719228449517.stgit@birch.djwong.org \
    --to=darrick.wong@oracle.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.