linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Allison Henderson <allison.henderson@oracle.com>
To: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 03/22] xfs: create an ioctl to scrub AG metadata
Date: Sun, 23 Jul 2017 09:37:56 -0700	[thread overview]
Message-ID: <41bd3e4e-1bc0-b5cc-1f8e-6ac9ca7f3b08@oracle.com> (raw)
In-Reply-To: <150061192762.14732.9274339959944172701.stgit@magnolia>

Reviewed by: Allison Henderson <allison.henderson@oracle.com>

On 7/20/2017 9:38 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Create an ioctl that can be used to scrub internal filesystem metadata.
> The new ioctl takes the metadata type, an (optional) AG number, an
> (optional) inode number and generation, and a flags argument.  This will
> be used by the upcoming XFS online scrub tool.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/Kconfig           |   17 +
>  fs/xfs/Makefile          |    7 +
>  fs/xfs/libxfs/xfs_fs.h   |   41 ++++
>  fs/xfs/scrub/common.c    |  533 ++++++++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/scrub/common.h    |  179 +++++++++++++++
>  fs/xfs/scrub/xfs_scrub.h |   29 +++
>  fs/xfs/xfs_ioctl.c       |   28 ++
>  fs/xfs/xfs_ioctl32.c     |    1
>  fs/xfs/xfs_trace.h       |    7 +
>  9 files changed, 841 insertions(+), 1 deletion(-)
>  create mode 100644 fs/xfs/scrub/common.c
>  create mode 100644 fs/xfs/scrub/common.h
>  create mode 100644 fs/xfs/scrub/xfs_scrub.h
>
>
> diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
> index 1b98cfa..f42fcf1 100644
> --- a/fs/xfs/Kconfig
> +++ b/fs/xfs/Kconfig
> @@ -71,6 +71,23 @@ config XFS_RT
>
>  	  If unsure, say N.
>
> +config XFS_ONLINE_SCRUB
> +	bool "XFS online metadata check support"
> +	default n
> +	depends on XFS_FS
> +	help
> +	  If you say Y here you will be able to check metadata on a
> +	  mounted XFS filesystem.  This feature is intended to reduce
> +	  filesystem downtime by supplementing xfs_repair.  The key
> +	  advantage here is to look for problems proactively so that
> +	  they can be dealt with in a controlled manner.
> +
> +	  This feature is considered EXPERIMENTAL.  Use with caution!
> +
> +	  See the xfs_scrub man page in section 8 for additional information.
> +
> +	  If unsure, say N.
> +
>  config XFS_WARN
>  	bool "XFS Verbose Warnings"
>  	depends on XFS_FS && !XFS_DEBUG
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 5b959ee..c4fdaa2 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -136,3 +136,10 @@ xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
>  xfs-$(CONFIG_SYSCTL)		+= xfs_sysctl.o
>  xfs-$(CONFIG_COMPAT)		+= xfs_ioctl32.o
>  xfs-$(CONFIG_EXPORTFS_BLOCK_OPS)	+= xfs_pnfs.o
> +
> +# online scrub/repair
> +ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
> +xfs-y				+= $(addprefix scrub/, \
> +				   common.o \
> +				   )
> +endif
> diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> index 5dedab9..aeccc99 100644
> --- a/fs/xfs/libxfs/xfs_fs.h
> +++ b/fs/xfs/libxfs/xfs_fs.h
> @@ -468,6 +468,46 @@ typedef struct xfs_swapext
>  #define XFS_FSOP_GOING_FLAGS_LOGFLUSH		0x1	/* flush log but not data */
>  #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH		0x2	/* don't flush log nor data */
>
> +/* metadata scrubbing */
> +struct xfs_scrub_metadata {
> +	__u32 sm_type;		/* What to check? */
> +	__u32 sm_flags;		/* flags; see below. */
> +	__u64 sm_ino;		/* inode number. */
> +	__u32 sm_gen;		/* inode generation. */
> +	__u32 sm_agno;		/* ag number. */
> +	__u64 sm_reserved[5];	/* pad to 64 bytes */
> +};
> +
> +/*
> + * Metadata types and flags for scrub operation.
> + */
> +#define XFS_SCRUB_TYPE_TEST	0	/* dummy to test ioctl */
> +#define XFS_SCRUB_TYPE_MAX	0
> +
> +/* i: repair this metadata */
> +#define XFS_SCRUB_FLAG_REPAIR		(1 << 0)
> +/* o: metadata object needs repair */
> +#define XFS_SCRUB_FLAG_CORRUPT		(1 << 1)
> +/* o: metadata object could be optimized */
> +#define XFS_SCRUB_FLAG_PREEN		(1 << 2)
> +/* o: cross-referencing failed */
> +#define XFS_SCRUB_FLAG_XFAIL		(1 << 3)
> +/* o: metadata object disagrees with cross-referenced metadata */
> +#define XFS_SCRUB_FLAG_XCORRUPT		(1 << 4)
> +/* o: scan was not complete */
> +#define XFS_SCRUB_FLAG_INCOMPLETE	(1 << 5)
> +/* o: metadata object looked funny but isn't corrupt */
> +#define XFS_SCRUB_FLAG_WARNING		(1 << 6)
> +
> +#define XFS_SCRUB_FLAGS_IN	(XFS_SCRUB_FLAG_REPAIR)
> +#define XFS_SCRUB_FLAGS_OUT	(XFS_SCRUB_FLAG_CORRUPT | \
> +				 XFS_SCRUB_FLAG_PREEN | \
> +				 XFS_SCRUB_FLAG_XFAIL | \
> +				 XFS_SCRUB_FLAG_XCORRUPT | \
> +				 XFS_SCRUB_FLAG_INCOMPLETE | \
> +				 XFS_SCRUB_FLAG_WARNING)
> +#define XFS_SCRUB_FLAGS_ALL	(XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
> +
>  /*
>   * AG reserved block counters
>   */
> @@ -520,6 +560,7 @@ struct xfs_fsop_ag_resblks {
>  #define XFS_IOC_ZERO_RANGE	_IOW ('X', 57, struct xfs_flock64)
>  #define XFS_IOC_FREE_EOFBLOCKS	_IOR ('X', 58, struct xfs_fs_eofblocks)
>  /*	XFS_IOC_GETFSMAP ------ hoisted 59         */
> +#define XFS_IOC_SCRUB_METADATA	_IOWR('X', 60, struct xfs_scrub_metadata)
>
>  /*
>   * ioctl commands that replace IRIX syssgi()'s
> diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
> new file mode 100644
> index 0000000..6931793
> --- /dev/null
> +++ b/fs/xfs/scrub/common.c
> @@ -0,0 +1,533 @@
> +/*
> + * Copyright (C) 2017 Oracle.  All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_defer.h"
> +#include "xfs_btree.h"
> +#include "xfs_bit.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans.h"
> +#include "xfs_trace.h"
> +#include "xfs_sb.h"
> +#include "xfs_inode.h"
> +#include "xfs_alloc.h"
> +#include "xfs_alloc_btree.h"
> +#include "xfs_bmap.h"
> +#include "xfs_bmap_btree.h"
> +#include "xfs_ialloc.h"
> +#include "xfs_ialloc_btree.h"
> +#include "xfs_refcount.h"
> +#include "xfs_refcount_btree.h"
> +#include "xfs_rmap.h"
> +#include "xfs_rmap_btree.h"
> +#include "scrub/xfs_scrub.h"
> +#include "scrub/common.h"
> +
> +/*
> + * Online Scrub and Repair
> + *
> + * Traditionally, XFS (the kernel driver) did not know how to check or
> + * repair on-disk data structures.  That task was left to the xfs_check
> + * and xfs_repair tools, both of which require taking the filesystem
> + * offline for a thorough but time consuming examination.  Online
> + * scrub & repair, on the other hand, enables us to check the metadata
> + * for obvious errors while carefully stepping around the filesystem's
> + * ongoing operations, locking rules, etc.
> + *
> + * Given that most XFS metadata consist of records stored in a btree,
> + * most of the checking functions iterate the btree blocks themselves
> + * looking for irregularities.  When a record block is encountered, each
> + * record can be checked for obviously bad values.  Record values can
> + * also be cross-referenced against other btrees to look for potential
> + * misunderstandings between pieces of metadata.
> + *
> + * It is expected that the checkers responsible for per-AG metadata
> + * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
> + * metadata structure, and perform any relevant cross-referencing before
> + * unlocking the AG and returning the results to userspace.  These
> + * scrubbers must not keep an AG locked for too long to avoid tying up
> + * the block and inode allocators.
> + *
> + * Block maps and b-trees rooted in an inode present a special challenge
> + * because they can involve extents from any AG.  The general scrubber
> + * structure of lock -> check -> xref -> unlock still holds, but AG
> + * locking order rules /must/ be obeyed to avoid deadlocks.  The
> + * ordering rule, of course, is that we must lock in increasing AG
> + * order.  Helper functions are provided to track which AG headers we've
> + * already locked.  If we detect an imminent locking order violation, we
> + * can signal a potential deadlock, in which case the scrubber can jump
> + * out to the top level, lock all the AGs in order, and retry the scrub.
> + *
> + * For file data (directories, extended attributes, symlinks) scrub, we
> + * can simply lock the inode and walk the data.  For btree data
> + * (directories and attributes) we follow the same btree-scrubbing
> + * strategy outlined previously to check the records.
> + *
> + * We use a bit of trickery with transactions to avoid buffer deadlocks
> + * if there is a cycle in the metadata.  The basic problem is that
> + * travelling down a btree involves locking the current buffer at each
> + * tree level.  If a pointer should somehow point back to a buffer that
> + * we've already examined, we will deadlock due to the second buffer
> + * locking attempt.  Note however that grabbing a buffer in transaction
> + * context links the locked buffer to the transaction.  If we try to
> + * re-grab the buffer in the context of the same transaction, we avoid
> + * the second lock attempt and continue.  Between the verifier and the
> + * scrubber, something will notice that something is amiss and report
> + * the corruption.  Therefore, each scrubber will allocate an empty
> + * transaction, attach buffers to it, and cancel the transaction at the
> + * end of the scrub run.  Cancelling a non-dirty transaction simply
> + * unlocks the buffers.
> + *
> + * There are four pieces of data that scrub can communicate to
> + * userspace.  The first is the error code (errno), which can be used to
> + * communicate operational errors in performing the scrub.  There are
> + * also three flags that can be set in the scrub context.  If the data
> + * structure itself is corrupt, the CORRUPT flag will be set.  If
> + * the metadata is correct but otherwise suboptimal, the PREEN flag
> + * will be set.
> + */
> +
> +struct xfs_scrub_meta_fns {
> +	int		(*setup)(struct xfs_scrub_context *,
> +				 struct xfs_inode *);
> +	int		(*scrub)(struct xfs_scrub_context *);
> +	bool		(*has)(struct xfs_sb *);
> +};
> +
> +/* Check for operational errors. */
> +bool
> +xfs_scrub_op_ok(
> +	struct xfs_scrub_context	*sc,
> +	xfs_agnumber_t			agno,
> +	xfs_agblock_t			bno,
> +	const char			*type,
> +	int				*error,
> +	const char			*func,
> +	int				line)
> +{
> +	struct xfs_mount		*mp = sc->mp;
> +
> +	switch (*error) {
> +	case 0:
> +		return true;
> +	case -EDEADLOCK:
> +		/* Used to restart an op with deadlock avoidance. */
> +		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
> +		break;
> +	case -EFSBADCRC:
> +	case -EFSCORRUPTED:
> +		/* Note the badness but don't abort. */
> +		sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> +		*error = 0;
> +		/* fall through */
> +	default:
> +		trace_xfs_scrub_op_error(mp, agno, bno, type, *error, func,
> +				line);
> +		break;
> +	}
> +	return false;
> +}
> +
> +/* Check for operational errors for a file offset. */
> +bool
> +xfs_scrub_file_op_ok(
> +	struct xfs_scrub_context	*sc,
> +	int				whichfork,
> +	xfs_fileoff_t			offset,
> +	const char			*type,
> +	int				*error,
> +	const char			*func,
> +	int				line)
> +{
> +	switch (*error) {
> +	case 0:
> +		return true;
> +	case -EDEADLOCK:
> +		/* Used to restart an op with deadlock avoidance. */
> +		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
> +		break;
> +	case -EFSBADCRC:
> +	case -EFSCORRUPTED:
> +		/* Note the badness but don't abort. */
> +		sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> +		*error = 0;
> +		/* fall through */
> +	default:
> +		trace_xfs_scrub_file_op_error(sc->ip, whichfork, offset, type,
> +				*error, func, line);
> +		break;
> +	}
> +	return false;
> +}
> +
> +/* Check for metadata block optimization possibilities. */
> +bool
> +xfs_scrub_block_preen(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_buf			*bp,
> +	const char			*type,
> +	bool				fs_ok,
> +	const char			*check,
> +	const char			*func,
> +	int				line)
> +{
> +	struct xfs_mount		*mp = sc->mp;
> +	xfs_fsblock_t			fsbno;
> +	xfs_agnumber_t			agno;
> +	xfs_agblock_t			bno;
> +
> +	if (fs_ok)
> +		return fs_ok;
> +
> +	fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
> +	agno = XFS_FSB_TO_AGNO(mp, fsbno);
> +	bno = XFS_FSB_TO_AGBNO(mp, fsbno);
> +
> +	sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
> +	trace_xfs_scrub_block_preen(mp, agno, bno, type, check, func, line);
> +	return fs_ok;
> +}
> +
> +/* Check for metadata block corruption. */
> +bool
> +xfs_scrub_block_ok(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_buf			*bp,
> +	const char			*type,
> +	bool				fs_ok,
> +	const char			*check,
> +	const char			*func,
> +	int				line)
> +{
> +	struct xfs_mount		*mp = sc->mp;
> +	xfs_fsblock_t			fsbno;
> +	xfs_agnumber_t			agno;
> +	xfs_agblock_t			bno;
> +
> +	if (fs_ok)
> +		return fs_ok;
> +
> +	fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
> +	agno = XFS_FSB_TO_AGNO(mp, fsbno);
> +	bno = XFS_FSB_TO_AGBNO(mp, fsbno);
> +
> +	sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> +	trace_xfs_scrub_block_error(mp, agno, bno, type, check, func, line);
> +	return fs_ok;
> +}
> +
> +/* Check for inode metadata corruption. */
> +bool
> +xfs_scrub_ino_ok(
> +	struct xfs_scrub_context	*sc,
> +	xfs_ino_t			ino,
> +	struct xfs_buf			*bp,
> +	const char			*type,
> +	bool				fs_ok,
> +	const char			*check,
> +	const char			*func,
> +	int				line)
> +{
> +	struct xfs_inode		*ip = sc->ip;
> +	struct xfs_mount		*mp = sc->mp;
> +	xfs_fsblock_t			fsbno;
> +	xfs_agnumber_t			agno;
> +	xfs_agblock_t			bno;
> +
> +	if (fs_ok)
> +		return fs_ok;
> +
> +	if (bp) {
> +		fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
> +		agno = XFS_FSB_TO_AGNO(mp, fsbno);
> +		bno = XFS_FSB_TO_AGBNO(mp, fsbno);
> +	} else {
> +		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
> +		bno = XFS_INO_TO_AGINO(mp, ip->i_ino);
> +	}
> +
> +	sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> +	trace_xfs_scrub_ino_error(mp, ino, agno, bno, type, check, func, line);
> +	return fs_ok;
> +}
> +
> +/* Check for inode metadata optimization possibilities. */
> +bool
> +xfs_scrub_ino_preen(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_buf			*bp,
> +	const char			*type,
> +	bool				fs_ok,
> +	const char			*check,
> +	const char			*func,
> +	int				line)
> +{
> +	struct xfs_inode		*ip = sc->ip;
> +	struct xfs_mount		*mp = sc->mp;
> +	xfs_fsblock_t			fsbno;
> +	xfs_agnumber_t			agno;
> +	xfs_agblock_t			bno;
> +
> +	if (fs_ok)
> +		return fs_ok;
> +
> +	if (bp) {
> +		fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
> +		agno = XFS_FSB_TO_AGNO(mp, fsbno);
> +		bno = XFS_FSB_TO_AGBNO(mp, fsbno);
> +	} else {
> +		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
> +		bno = XFS_INO_TO_AGINO(mp, ip->i_ino);
> +	}
> +
> +	sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
> +	trace_xfs_scrub_ino_preen(mp, ip->i_ino, agno, bno, type, check,
> +			func, line);
> +	return fs_ok;
> +}
> +
> +/* Check for file data block corruption. */
> +bool
> +xfs_scrub_data_ok(
> +	struct xfs_scrub_context	*sc,
> +	int				whichfork,
> +	xfs_fileoff_t			offset,
> +	const char			*type,
> +	bool				fs_ok,
> +	const char			*check,
> +	const char			*func,
> +	int				line)
> +{
> +	if (fs_ok)
> +		return fs_ok;
> +
> +	sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> +	trace_xfs_scrub_data_error(sc->ip, whichfork, offset, type, check,
> +			func, line);
> +	return fs_ok;
> +}
> +
> +/* Check for file data block non-corruption problems. */
> +bool
> +xfs_scrub_data_warn_ok(
> +	struct xfs_scrub_context	*sc,
> +	int				whichfork,
> +	xfs_fileoff_t			offset,
> +	const char			*type,
> +	bool				fs_ok,
> +	const char			*check,
> +	const char			*func,
> +	int				line)
> +{
> +	if (fs_ok)
> +		return fs_ok;
> +
> +	sc->sm->sm_flags |= XFS_SCRUB_FLAG_WARNING;
> +	trace_xfs_scrub_data_warning(sc->ip, whichfork, offset, type, check,
> +			func, line);
> +	return fs_ok;
> +}
> +
> +/* Signal an incomplete scrub. */
> +bool
> +xfs_scrub_incomplete(
> +	struct xfs_scrub_context	*sc,
> +	const char			*type,
> +	bool				fs_ok,
> +	const char			*check,
> +	const char			*func,
> +	int				line)
> +{
> +	if (fs_ok)
> +		return fs_ok;
> +
> +	sc->sm->sm_flags |= XFS_SCRUB_FLAG_INCOMPLETE;
> +	trace_xfs_scrub_incomplete(sc->mp, type, check, func, line);
> +	return fs_ok;
> +}
> +
> +/* Dummy scrubber */
> +
> +int
> +xfs_scrub_dummy(
> +	struct xfs_scrub_context	*sc)
> +{
> +	if (sc->sm->sm_ino || sc->sm->sm_agno)
> +		return -EINVAL;
> +	if (sc->sm->sm_gen & XFS_SCRUB_FLAG_CORRUPT)
> +		sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> +	if (sc->sm->sm_gen & XFS_SCRUB_FLAG_PREEN)
> +		sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
> +	if (sc->sm->sm_gen & XFS_SCRUB_FLAG_XFAIL)
> +		sc->sm->sm_flags |= XFS_SCRUB_FLAG_XFAIL;
> +	if (sc->sm->sm_gen & XFS_SCRUB_FLAG_XCORRUPT)
> +		sc->sm->sm_flags |= XFS_SCRUB_FLAG_XCORRUPT;
> +	if (sc->sm->sm_gen & ~XFS_SCRUB_FLAGS_OUT)
> +		return -ENOENT;
> +
> +	return 0;
> +}
> +
> +/* Per-scrubber setup functions */
> +
> +/* Set us up with a transaction and an empty context. */
> +int
> +xfs_scrub_setup_fs(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_inode		*ip)
> +{
> +	return xfs_scrub_trans_alloc(sc->sm, sc->mp,
> +			&M_RES(sc->mp)->tr_itruncate, 0, 0, 0, &sc->tp);
> +}
> +
> +/* Scrub setup and teardown */
> +
> +/* Free all the resources and finish the transactions. */
> +STATIC int
> +xfs_scrub_teardown(
> +	struct xfs_scrub_context	*sc,
> +	int				error)
> +{
> +	if (sc->tp) {
> +		xfs_trans_cancel(sc->tp);
> +		sc->tp = NULL;
> +	}
> +	return error;
> +}
> +
> +/* Perform common scrub context initialization. */
> +STATIC int
> +xfs_scrub_setup(
> +	struct xfs_inode		*ip,
> +	struct xfs_scrub_context	*sc,
> +	const struct xfs_scrub_meta_fns	*fns,
> +	struct xfs_scrub_metadata	*sm,
> +	bool				try_harder)
> +{
> +	memset(sc, 0, sizeof(*sc));
> +	sc->mp = ip->i_mount;
> +	sc->sm = sm;
> +	sc->fns = fns;
> +	sc->try_harder = try_harder;
> +
> +	return sc->fns->setup(sc, ip);
> +}
> +
> +/* Scrubbing dispatch. */
> +
> +static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
> +	{ /* dummy verifier */
> +		.setup	= xfs_scrub_setup_fs,
> +		.scrub	= xfs_scrub_dummy,
> +	},
> +};
> +
> +/* Dispatch metadata scrubbing. */
> +int
> +xfs_scrub_metadata(
> +	struct xfs_inode		*ip,
> +	struct xfs_scrub_metadata	*sm)
> +{
> +	struct xfs_scrub_context	sc;
> +	struct xfs_mount		*mp = ip->i_mount;
> +	const struct xfs_scrub_meta_fns	*fns;
> +	bool				try_harder = false;
> +	int				error = 0;
> +
> +	trace_xfs_scrub(ip, sm, error);
> +
> +	/* Forbidden if we are shut down or mounted norecovery. */
> +	error = -ESHUTDOWN;
> +	if (XFS_FORCED_SHUTDOWN(mp))
> +		goto out;
> +	error = -ENOTRECOVERABLE;
> +	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
> +		goto out;
> +
> +	/* Check our inputs. */
> +	error = -EINVAL;
> +	sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
> +	if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
> +		goto out;
> +	if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
> +		goto out;
> +
> +	/* Do we know about this type of metadata? */
> +	error = -ENOENT;
> +	if (sm->sm_type > XFS_SCRUB_TYPE_MAX)
> +		goto out;
> +	fns = &meta_scrub_fns[sm->sm_type];
> +	if (fns->scrub == NULL)
> +		goto out;
> +
> +	/* Does this fs even support this type of metadata? */
> +	if (fns->has && !fns->has(&mp->m_sb))
> +		goto out;
> +
> +	/* We don't know how to repair anything yet. */
> +	error = -EOPNOTSUPP;
> +	if (sm->sm_flags & XFS_SCRUB_FLAG_REPAIR)
> +		goto out;
> +
> +	/* This isn't a stable feature.  Use with care. */
> +	{
> +		static bool warned;
> +
> +		if (!warned)
> +			xfs_alert(mp,
> +	"EXPERIMENTAL online scrub feature in use. Use at your own risk!");
> +		warned = true;
> +	}
> +
> +retry_op:
> +	/* Set up for the operation. */
> +	error = xfs_scrub_setup(ip, &sc, fns, sm, try_harder);
> +	if (error)
> +		goto out_teardown;
> +
> +	/* Scrub for errors. */
> +	error = fns->scrub(&sc);
> +	if (!try_harder && error == -EDEADLOCK) {
> +		/*
> +		 * Scrubbers return -EDEADLOCK to mean 'try harder'.
> +		 * Tear down everything we hold, then set up again with
> +		 * preparation for worst-case scenarios.
> +		 */
> +		error = xfs_scrub_teardown(&sc, 0);
> +		if (error)
> +			goto out;
> +		try_harder = true;
> +		goto retry_op;
> +	} else if (error)
> +		goto out_teardown;
> +
> +	if (xfs_scrub_found_corruption(sm))
> +		xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
> +
> +out_teardown:
> +	error = xfs_scrub_teardown(&sc, error);
> +out:
> +	trace_xfs_scrub_done(ip, sm, error);
> +	return error;
> +}
> diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
> new file mode 100644
> index 0000000..4f3113a
> --- /dev/null
> +++ b/fs/xfs/scrub/common.h
> @@ -0,0 +1,179 @@
> +/*
> + * Copyright (C) 2017 Oracle.  All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
> + */
> +#ifndef __XFS_REPAIR_COMMON_H__
> +#define __XFS_REPAIR_COMMON_H__
> +
> +/* Did we find something broken? */
> +static inline bool xfs_scrub_found_corruption(struct xfs_scrub_metadata *sm)
> +{
> +	return sm->sm_flags & (XFS_SCRUB_FLAG_CORRUPT |
> +			       XFS_SCRUB_FLAG_XCORRUPT);
> +}
> +
> +struct xfs_scrub_context {
> +	/* General scrub state. */
> +	struct xfs_mount		*mp;
> +	struct xfs_scrub_metadata	*sm;
> +	const struct xfs_scrub_meta_fns	*fns;
> +	struct xfs_trans		*tp;
> +	struct xfs_inode		*ip;
> +	bool				try_harder;
> +};
> +
> +/* Should we end the scrub early? */
> +static inline bool
> +xfs_scrub_should_terminate(
> +	int		*error)
> +{
> +	if (fatal_signal_pending(current)) {
> +		if (*error == 0)
> +			*error = -EAGAIN;
> +		return true;
> +	}
> +	return false;
> +}
> +
> +/*
> + * Grab a transaction.  If we're going to repair something, we need to
> + * ensure there's enough reservation to make all the changes.  If not,
> + * we can use an empty transaction.
> + */
> +static inline int
> +xfs_scrub_trans_alloc(
> +	struct xfs_scrub_metadata	*sm,
> +	struct xfs_mount		*mp,
> +	struct xfs_trans_res		*resp,
> +	uint				blocks,
> +	uint				rtextents,
> +	uint				flags,
> +	struct xfs_trans		**tpp)
> +{
> +	return xfs_trans_alloc_empty(mp, tpp);
> +}
> +
> +/* Check for operational errors. */
> +bool xfs_scrub_op_ok(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
> +		     xfs_agblock_t bno, const char *type, int *error,
> +		     const char	*func, int line);
> +#define XFS_SCRUB_OP_ERROR_GOTO(sc, agno, bno, type, error, label) \
> +	do { \
> +		if (!xfs_scrub_op_ok((sc), (agno), (bno), (type), \
> +				(error), __func__, __LINE__)) \
> +			goto label; \
> +	} while (0)
> +
> +/* Check for operational errors for a file offset. */
> +bool xfs_scrub_file_op_ok(struct xfs_scrub_context *sc, int whichfork,
> +			  xfs_fileoff_t offset, const char *type,
> +			  int *error, const char *func, int line);
> +#define XFS_SCRUB_FILE_OP_ERROR_GOTO(sc, which, off, type, error, label) \
> +	do { \
> +		if (!xfs_scrub_file_op_ok((sc), (which), (off), (type), \
> +				(error), __func__, __LINE__)) \
> +			goto label; \
> +	} while (0)
> +
> +/* Check for metadata block optimization possibilities. */
> +bool xfs_scrub_block_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp,
> +			   const char *type, bool fs_ok, const char *check,
> +			   const char *func, int line);
> +#define XFS_SCRUB_PREEN(sc, bp, type, fs_ok) \
> +	xfs_scrub_block_preen((sc), (bp), (type), (fs_ok), #fs_ok, \
> +			__func__, __LINE__)
> +
> +/* Check for inode metadata optimization possibilities. */
> +bool xfs_scrub_ino_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp,
> +		      const char *type, bool fs_ok, const char *check,
> +		      const char *func, int line);
> +#define XFS_SCRUB_INO_PREEN(sc, bp, type, fs_ok) \
> +	xfs_scrub_ino_preen((sc), (bp), (type), (fs_ok), #fs_ok, \
> +			__func__, __LINE__)
> +
> +/* Check for metadata block corruption. */
> +bool xfs_scrub_block_ok(struct xfs_scrub_context *sc, struct xfs_buf *bp,
> +			const char *type, bool fs_ok, const char *check,
> +			const char *func, int line);
> +#define XFS_SCRUB_CHECK(sc, bp, type, fs_ok) \
> +	xfs_scrub_block_ok((sc), (bp), (type), (fs_ok), #fs_ok, \
> +			__func__, __LINE__)
> +#define XFS_SCRUB_GOTO(sc, bp, type, fs_ok, label) \
> +	do { \
> +		if (!xfs_scrub_block_ok((sc), (bp), (type), (fs_ok), \
> +				#fs_ok, __func__, __LINE__)) \
> +			goto label; \
> +	} while (0)
> +
> +/* Check for inode metadata corruption. */
> +bool xfs_scrub_ino_ok(struct xfs_scrub_context *sc, xfs_ino_t ino,
> +		      struct xfs_buf *bp, const char *type, bool fs_ok,
> +		      const char *check, const char *func, int line);
> +#define XFS_SCRUB_INO_CHECK(sc, ino, bp, type, fs_ok) \
> +	xfs_scrub_ino_ok((sc), (ino), (bp), (type), (fs_ok), #fs_ok, \
> +			__func__, __LINE__)
> +#define XFS_SCRUB_INO_GOTO(sc, ino, bp, type, fs_ok, label) \
> +	do { \
> +		if (!xfs_scrub_ino_ok((sc), (ino), (bp), (type), (fs_ok), \
> +				#fs_ok, __func__, __LINE__)) \
> +			goto label; \
> +	} while (0)
> +
> +/* Check for file data block corruption. */
> +bool xfs_scrub_data_ok(struct xfs_scrub_context *sc, int whichfork,
> +		       xfs_fileoff_t offset, const char *type, bool fs_ok,
> +		       const char *check, const char *func, int line);
> +#define XFS_SCRUB_DATA_CHECK(sc, whichfork, offset, type, fs_ok) \
> +	xfs_scrub_data_ok((sc), (whichfork), (offset), (type), (fs_ok), \
> +			#fs_ok, __func__, __LINE__)
> +#define XFS_SCRUB_DATA_GOTO(sc, whichfork, offset, type, fs_ok, label) \
> +	do { \
> +		if (!xfs_scrub_data_ok((sc), (whichfork), (offset), \
> +				(type), (fs_ok), #fs_ok, __func__, __LINE__)) \
> +			goto label; \
> +	} while (0)
> +
> +/* Check for file data block non-corruption problems. */
> +bool xfs_scrub_data_warn_ok(struct xfs_scrub_context *sc, int whichfork,
> +			    xfs_fileoff_t offset, const char *type, bool fs_ok,
> +			    const char *check, const char *func, int line);
> +#define XFS_SCRUB_DATA_WARN(sc, whichfork, offset, type, fs_ok) \
> +	xfs_scrub_data_warn_ok((sc), (whichfork), (offset), (type), (fs_ok), \
> +			#fs_ok, __func__, __LINE__)
> +
> +/* Signal an incomplete scrub. */
> +bool xfs_scrub_incomplete(struct xfs_scrub_context *sc, const char *type,
> +			  bool fs_ok, const char *check, const char *func,
> +			  int line);
> +#define XFS_SCRUB_INCOMPLETE(sc, type, fs_ok) \
> +	xfs_scrub_incomplete((sc), (type), (fs_ok), \
> +			#fs_ok, __func__, __LINE__)
> +
> +/* Setup functions */
> +
> +#define SETUP_FN(name) int name(struct xfs_scrub_context *sc, struct xfs_inode *ip)
> +SETUP_FN(xfs_scrub_setup_fs);
> +#undef SETUP_FN
> +
> +/* Metadata scrubbers */
> +
> +#define SCRUB_FN(name) int name(struct xfs_scrub_context *sc)
> +SCRUB_FN(xfs_scrub_dummy);
> +#undef SCRUB_FN
> +
> +#endif	/* __XFS_REPAIR_COMMON_H__ */
> diff --git a/fs/xfs/scrub/xfs_scrub.h b/fs/xfs/scrub/xfs_scrub.h
> new file mode 100644
> index 0000000..e00e0ea
> --- /dev/null
> +++ b/fs/xfs/scrub/xfs_scrub.h
> @@ -0,0 +1,29 @@
> +/*
> + * Copyright (C) 2017 Oracle.  All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
> + */
> +#ifndef __XFS_SCRUB_H__
> +#define __XFS_SCRUB_H__
> +
> +#ifndef CONFIG_XFS_ONLINE_SCRUB
> +# define xfs_scrub_metadata(ip, sm)	(-ENOTTY)
> +#else
> +int xfs_scrub_metadata(struct xfs_inode *ip, struct xfs_scrub_metadata *sm);
> +#endif /* CONFIG_XFS_ONLINE_SCRUB */
> +
> +#endif	/* __XFS_SCRUB_H__ */
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index cc00260..87b3874 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -44,6 +44,7 @@
>  #include "xfs_btree.h"
>  #include <linux/fsmap.h>
>  #include "xfs_fsmap.h"
> +#include "scrub/xfs_scrub.h"
>
>  #include <linux/capability.h>
>  #include <linux/cred.h>
> @@ -1689,6 +1690,30 @@ xfs_ioc_getfsmap(
>  	return 0;
>  }
>
> +STATIC int
> +xfs_ioc_scrub_metadata(
> +	struct xfs_inode		*ip,
> +	void				__user *arg)
> +{
> +	struct xfs_scrub_metadata	scrub;
> +	int				error;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	if (copy_from_user(&scrub, arg, sizeof(scrub)))
> +		return -EFAULT;
> +
> +	error = xfs_scrub_metadata(ip, &scrub);
> +	if (error)
> +		return error;
> +
> +	if (copy_to_user(arg, &scrub, sizeof(scrub)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>  int
>  xfs_ioc_swapext(
>  	xfs_swapext_t	*sxp)
> @@ -1872,6 +1897,9 @@ xfs_file_ioctl(
>  	case FS_IOC_GETFSMAP:
>  		return xfs_ioc_getfsmap(ip, arg);
>
> +	case XFS_IOC_SCRUB_METADATA:
> +		return xfs_ioc_scrub_metadata(ip, arg);
> +
>  	case XFS_IOC_FD_TO_HANDLE:
>  	case XFS_IOC_PATH_TO_HANDLE:
>  	case XFS_IOC_PATH_TO_FSHANDLE: {
> diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
> index e8b4de3..972d4bd 100644
> --- a/fs/xfs/xfs_ioctl32.c
> +++ b/fs/xfs/xfs_ioctl32.c
> @@ -557,6 +557,7 @@ xfs_file_compat_ioctl(
>  	case XFS_IOC_ERROR_CLEARALL:
>  	case FS_IOC_GETFSMAP:
>  	case XFS_IOC_GET_AG_RESBLKS:
> +	case XFS_IOC_SCRUB_METADATA:
>  		return xfs_file_ioctl(filp, cmd, p);
>  #ifndef BROKEN_X86_ALIGNMENT
>  	/* These are handled fine if no alignment issues */
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 2e7e193..d4de29b 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -3312,7 +3312,7 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
>
>  /* scrub */
>  #define XFS_SCRUB_TYPE_DESC \
> -	{ 0, NULL }
> +	{ XFS_SCRUB_TYPE_TEST,		"dummy" }
>  DECLARE_EVENT_CLASS(xfs_scrub_class,
>  	TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
>  		 int error),
> @@ -3330,6 +3330,11 @@ DECLARE_EVENT_CLASS(xfs_scrub_class,
>  	TP_fast_assign(
>  		__entry->dev = ip->i_mount->m_super->s_dev;
>  		__entry->ino = ip->i_ino;
> +		__entry->type = sm->sm_type;
> +		__entry->agno = sm->sm_agno;
> +		__entry->inum = sm->sm_ino;
> +		__entry->gen = sm->sm_gen;
> +		__entry->flags = sm->sm_flags;
>  		__entry->error = error;
>  	),
>  	TP_printk("dev %d:%d ino %llu type %s agno %u inum %llu gen %u flags 0x%x error %d",
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

  reply	other threads:[~2017-07-23 16:37 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-21  4:38 [PATCH v8 00/22] xfs: online scrub support Darrick J. Wong
2017-07-21  4:38 ` [PATCH 01/22] xfs: query the per-AG reservation counters Darrick J. Wong
2017-07-23 16:16   ` Allison Henderson
2017-07-23 22:25   ` Dave Chinner
2017-07-24 19:07     ` Darrick J. Wong
2017-07-21  4:38 ` [PATCH 02/22] xfs: add scrub tracepoints Darrick J. Wong
2017-07-23 16:23   ` Allison Henderson
2017-07-21  4:38 ` [PATCH 03/22] xfs: create an ioctl to scrub AG metadata Darrick J. Wong
2017-07-23 16:37   ` Allison Henderson [this message]
2017-07-23 23:45   ` Dave Chinner
2017-07-24 21:14     ` Darrick J. Wong
2017-07-21  4:38 ` [PATCH 04/22] xfs: generic functions to scrub metadata and btrees Darrick J. Wong
2017-07-23 16:40   ` Allison Henderson
2017-07-24  1:05   ` Dave Chinner
2017-07-24 21:58     ` Darrick J. Wong
2017-07-24 23:15       ` Dave Chinner
2017-07-25  0:39         ` Darrick J. Wong
2017-07-21  4:39 ` [PATCH 05/22] xfs: scrub in-memory metadata buffers Darrick J. Wong
2017-07-23 16:48   ` Allison Henderson
2017-07-24  1:43   ` Dave Chinner
2017-07-24 22:36     ` Darrick J. Wong
2017-07-24 23:38       ` Dave Chinner
2017-07-25  0:14         ` Darrick J. Wong
2017-07-25  3:32           ` Dave Chinner
2017-07-25  5:27             ` Darrick J. Wong
2017-07-21  4:39 ` [PATCH 06/22] xfs: scrub the backup superblocks Darrick J. Wong
2017-07-23 16:50   ` Allison Henderson
2017-07-25  4:05   ` Dave Chinner
2017-07-25  5:42     ` Darrick J. Wong
2017-07-21  4:39 ` [PATCH 07/22] xfs: scrub AGF and AGFL Darrick J. Wong
2017-07-23 16:59   ` Allison Henderson
2017-07-21  4:39 ` [PATCH 08/22] xfs: scrub the AGI Darrick J. Wong
2017-07-23 17:02   ` Allison Henderson
2017-07-21  4:39 ` [PATCH 09/22] xfs: scrub free space btrees Darrick J. Wong
2017-07-23 17:09   ` Allison Henderson
2017-07-21  4:39 ` [PATCH 10/22] xfs: scrub inode btrees Darrick J. Wong
2017-07-23 17:15   ` Allison Henderson
2017-07-21  4:39 ` [PATCH 11/22] xfs: scrub rmap btrees Darrick J. Wong
2017-07-23 17:21   ` Allison Henderson
2017-07-21  4:39 ` [PATCH 12/22] xfs: scrub refcount btrees Darrick J. Wong
2017-07-23 17:25   ` Allison Henderson
2017-07-21  4:39 ` [PATCH 13/22] xfs: scrub inodes Darrick J. Wong
2017-07-23 17:38   ` Allison Henderson
2017-07-24 20:02     ` Darrick J. Wong
2017-07-21  4:40 ` [PATCH 14/22] xfs: scrub inode block mappings Darrick J. Wong
2017-07-23 17:41   ` Allison Henderson
2017-07-24 20:05     ` Darrick J. Wong
2017-07-21  4:40 ` [PATCH 15/22] xfs: scrub directory/attribute btrees Darrick J. Wong
2017-07-23 17:45   ` Allison Henderson
2017-07-21  4:40 ` [PATCH 16/22] xfs: scrub directory metadata Darrick J. Wong
2017-07-23 17:51   ` Allison Henderson
2017-07-21  4:40 ` [PATCH 17/22] xfs: scrub directory freespace Darrick J. Wong
2017-07-23 17:55   ` Allison Henderson
2017-07-21  4:40 ` [PATCH 18/22] xfs: scrub extended attributes Darrick J. Wong
2017-07-23 17:57   ` Allison Henderson
2017-07-21  4:40 ` [PATCH 19/22] xfs: scrub symbolic links Darrick J. Wong
2017-07-23 17:59   ` Allison Henderson
2017-07-21  4:40 ` [PATCH 20/22] xfs: scrub parent pointers Darrick J. Wong
2017-07-23 18:03   ` Allison Henderson
2017-07-21  4:40 ` [PATCH 21/22] xfs: scrub realtime bitmap/summary Darrick J. Wong
2017-07-23 18:05   ` Allison Henderson
2017-07-21  4:40 ` [PATCH 22/22] xfs: scrub quota information Darrick J. Wong
2017-07-23 18:07   ` Allison Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=41bd3e4e-1bc0-b5cc-1f8e-6ac9ca7f3b08@oracle.com \
    --to=allison.henderson@oracle.com \
    --cc=darrick.wong@oracle.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).