From: Allison Henderson <allison.henderson@oracle.com>
To: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 03/22] xfs: create an ioctl to scrub AG metadata
Date: Sun, 23 Jul 2017 09:37:56 -0700 [thread overview]
Message-ID: <41bd3e4e-1bc0-b5cc-1f8e-6ac9ca7f3b08@oracle.com> (raw)
In-Reply-To: <150061192762.14732.9274339959944172701.stgit@magnolia>
Reviewed by: Allison Henderson <allison.henderson@oracle.com>
On 7/20/2017 9:38 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Create an ioctl that can be used to scrub internal filesystem metadata.
> The new ioctl takes the metadata type, an (optional) AG number, an
> (optional) inode number and generation, and a flags argument. This will
> be used by the upcoming XFS online scrub tool.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> fs/xfs/Kconfig | 17 +
> fs/xfs/Makefile | 7 +
> fs/xfs/libxfs/xfs_fs.h | 41 ++++
> fs/xfs/scrub/common.c | 533 ++++++++++++++++++++++++++++++++++++++++++++++
> fs/xfs/scrub/common.h | 179 +++++++++++++++
> fs/xfs/scrub/xfs_scrub.h | 29 +++
> fs/xfs/xfs_ioctl.c | 28 ++
> fs/xfs/xfs_ioctl32.c | 1
> fs/xfs/xfs_trace.h | 7 +
> 9 files changed, 841 insertions(+), 1 deletion(-)
> create mode 100644 fs/xfs/scrub/common.c
> create mode 100644 fs/xfs/scrub/common.h
> create mode 100644 fs/xfs/scrub/xfs_scrub.h
>
>
> diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
> index 1b98cfa..f42fcf1 100644
> --- a/fs/xfs/Kconfig
> +++ b/fs/xfs/Kconfig
> @@ -71,6 +71,23 @@ config XFS_RT
>
> If unsure, say N.
>
> +config XFS_ONLINE_SCRUB
> + bool "XFS online metadata check support"
> + default n
> + depends on XFS_FS
> + help
> + If you say Y here you will be able to check metadata on a
> + mounted XFS filesystem. This feature is intended to reduce
> + filesystem downtime by supplementing xfs_repair. The key
> + advantage here is to look for problems proactively so that
> + they can be dealt with in a controlled manner.
> +
> + This feature is considered EXPERIMENTAL. Use with caution!
> +
> + See the xfs_scrub man page in section 8 for additional information.
> +
> + If unsure, say N.
> +
> config XFS_WARN
> bool "XFS Verbose Warnings"
> depends on XFS_FS && !XFS_DEBUG
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 5b959ee..c4fdaa2 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -136,3 +136,10 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
> xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
> xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
> xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
> +
> +# online scrub/repair
> +ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
> +xfs-y += $(addprefix scrub/, \
> + common.o \
> + )
> +endif
> diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> index 5dedab9..aeccc99 100644
> --- a/fs/xfs/libxfs/xfs_fs.h
> +++ b/fs/xfs/libxfs/xfs_fs.h
> @@ -468,6 +468,46 @@ typedef struct xfs_swapext
> #define XFS_FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
> #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
>
> +/* metadata scrubbing */
> +struct xfs_scrub_metadata {
> + __u32 sm_type; /* What to check? */
> + __u32 sm_flags; /* flags; see below. */
> + __u64 sm_ino; /* inode number. */
> + __u32 sm_gen; /* inode generation. */
> + __u32 sm_agno; /* ag number. */
> + __u64 sm_reserved[5]; /* pad to 64 bytes */
> +};
> +
> +/*
> + * Metadata types and flags for scrub operation.
> + */
> +#define XFS_SCRUB_TYPE_TEST 0 /* dummy to test ioctl */
> +#define XFS_SCRUB_TYPE_MAX 0
> +
> +/* i: repair this metadata */
> +#define XFS_SCRUB_FLAG_REPAIR (1 << 0)
> +/* o: metadata object needs repair */
> +#define XFS_SCRUB_FLAG_CORRUPT (1 << 1)
> +/* o: metadata object could be optimized */
> +#define XFS_SCRUB_FLAG_PREEN (1 << 2)
> +/* o: cross-referencing failed */
> +#define XFS_SCRUB_FLAG_XFAIL (1 << 3)
> +/* o: metadata object disagrees with cross-referenced metadata */
> +#define XFS_SCRUB_FLAG_XCORRUPT (1 << 4)
> +/* o: scan was not complete */
> +#define XFS_SCRUB_FLAG_INCOMPLETE (1 << 5)
> +/* o: metadata object looked funny but isn't corrupt */
> +#define XFS_SCRUB_FLAG_WARNING (1 << 6)
> +
> +#define XFS_SCRUB_FLAGS_IN (XFS_SCRUB_FLAG_REPAIR)
> +#define XFS_SCRUB_FLAGS_OUT (XFS_SCRUB_FLAG_CORRUPT | \
> + XFS_SCRUB_FLAG_PREEN | \
> + XFS_SCRUB_FLAG_XFAIL | \
> + XFS_SCRUB_FLAG_XCORRUPT | \
> + XFS_SCRUB_FLAG_INCOMPLETE | \
> + XFS_SCRUB_FLAG_WARNING)
> +#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
> +
> /*
> * AG reserved block counters
> */
> @@ -520,6 +560,7 @@ struct xfs_fsop_ag_resblks {
> #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
> #define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
> /* XFS_IOC_GETFSMAP ------ hoisted 59 */
> +#define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata)
>
> /*
> * ioctl commands that replace IRIX syssgi()'s
> diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
> new file mode 100644
> index 0000000..6931793
> --- /dev/null
> +++ b/fs/xfs/scrub/common.c
> @@ -0,0 +1,533 @@
> +/*
> + * Copyright (C) 2017 Oracle. All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_defer.h"
> +#include "xfs_btree.h"
> +#include "xfs_bit.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans.h"
> +#include "xfs_trace.h"
> +#include "xfs_sb.h"
> +#include "xfs_inode.h"
> +#include "xfs_alloc.h"
> +#include "xfs_alloc_btree.h"
> +#include "xfs_bmap.h"
> +#include "xfs_bmap_btree.h"
> +#include "xfs_ialloc.h"
> +#include "xfs_ialloc_btree.h"
> +#include "xfs_refcount.h"
> +#include "xfs_refcount_btree.h"
> +#include "xfs_rmap.h"
> +#include "xfs_rmap_btree.h"
> +#include "scrub/xfs_scrub.h"
> +#include "scrub/common.h"
> +
> +/*
> + * Online Scrub and Repair
> + *
> + * Traditionally, XFS (the kernel driver) did not know how to check or
> + * repair on-disk data structures. That task was left to the xfs_check
> + * and xfs_repair tools, both of which require taking the filesystem
> + * offline for a thorough but time consuming examination. Online
> + * scrub & repair, on the other hand, enables us to check the metadata
> + * for obvious errors while carefully stepping around the filesystem's
> + * ongoing operations, locking rules, etc.
> + *
> + * Given that most XFS metadata consist of records stored in a btree,
> + * most of the checking functions iterate the btree blocks themselves
> + * looking for irregularities. When a record block is encountered, each
> + * record can be checked for obviously bad values. Record values can
> + * also be cross-referenced against other btrees to look for potential
> + * misunderstandings between pieces of metadata.
> + *
> + * It is expected that the checkers responsible for per-AG metadata
> + * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
> + * metadata structure, and perform any relevant cross-referencing before
> + * unlocking the AG and returning the results to userspace. These
> + * scrubbers must not keep an AG locked for too long to avoid tying up
> + * the block and inode allocators.
> + *
> + * Block maps and b-trees rooted in an inode present a special challenge
> + * because they can involve extents from any AG. The general scrubber
> + * structure of lock -> check -> xref -> unlock still holds, but AG
> + * locking order rules /must/ be obeyed to avoid deadlocks. The
> + * ordering rule, of course, is that we must lock in increasing AG
> + * order. Helper functions are provided to track which AG headers we've
> + * already locked. If we detect an imminent locking order violation, we
> + * can signal a potential deadlock, in which case the scrubber can jump
> + * out to the top level, lock all the AGs in order, and retry the scrub.
> + *
> + * For file data (directories, extended attributes, symlinks) scrub, we
> + * can simply lock the inode and walk the data. For btree data
> + * (directories and attributes) we follow the same btree-scrubbing
> + * strategy outlined previously to check the records.
> + *
> + * We use a bit of trickery with transactions to avoid buffer deadlocks
> + * if there is a cycle in the metadata. The basic problem is that
> + * travelling down a btree involves locking the current buffer at each
> + * tree level. If a pointer should somehow point back to a buffer that
> + * we've already examined, we will deadlock due to the second buffer
> + * locking attempt. Note however that grabbing a buffer in transaction
> + * context links the locked buffer to the transaction. If we try to
> + * re-grab the buffer in the context of the same transaction, we avoid
> + * the second lock attempt and continue. Between the verifier and the
> + * scrubber, something will notice that something is amiss and report
> + * the corruption. Therefore, each scrubber will allocate an empty
> + * transaction, attach buffers to it, and cancel the transaction at the
> + * end of the scrub run. Cancelling a non-dirty transaction simply
> + * unlocks the buffers.
> + *
> + * There are four pieces of data that scrub can communicate to
> + * userspace. The first is the error code (errno), which can be used to
> + * communicate operational errors in performing the scrub. There are
> + * also three flags that can be set in the scrub context. If the data
> + * structure itself is corrupt, the CORRUPT flag will be set. If
> + * the metadata is correct but otherwise suboptimal, the PREEN flag
> + * will be set.
> + */
> +
> +struct xfs_scrub_meta_fns {
> + int (*setup)(struct xfs_scrub_context *,
> + struct xfs_inode *);
> + int (*scrub)(struct xfs_scrub_context *);
> + bool (*has)(struct xfs_sb *);
> +};
> +
> +/* Check for operational errors. */
> +bool
> +xfs_scrub_op_ok(
> + struct xfs_scrub_context *sc,
> + xfs_agnumber_t agno,
> + xfs_agblock_t bno,
> + const char *type,
> + int *error,
> + const char *func,
> + int line)
> +{
> + struct xfs_mount *mp = sc->mp;
> +
> + switch (*error) {
> + case 0:
> + return true;
> + case -EDEADLOCK:
> + /* Used to restart an op with deadlock avoidance. */
> + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
> + break;
> + case -EFSBADCRC:
> + case -EFSCORRUPTED:
> + /* Note the badness but don't abort. */
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> + *error = 0;
> + /* fall through */
> + default:
> + trace_xfs_scrub_op_error(mp, agno, bno, type, *error, func,
> + line);
> + break;
> + }
> + return false;
> +}
> +
> +/* Check for operational errors for a file offset. */
> +bool
> +xfs_scrub_file_op_ok(
> + struct xfs_scrub_context *sc,
> + int whichfork,
> + xfs_fileoff_t offset,
> + const char *type,
> + int *error,
> + const char *func,
> + int line)
> +{
> + switch (*error) {
> + case 0:
> + return true;
> + case -EDEADLOCK:
> + /* Used to restart an op with deadlock avoidance. */
> + trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
> + break;
> + case -EFSBADCRC:
> + case -EFSCORRUPTED:
> + /* Note the badness but don't abort. */
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> + *error = 0;
> + /* fall through */
> + default:
> + trace_xfs_scrub_file_op_error(sc->ip, whichfork, offset, type,
> + *error, func, line);
> + break;
> + }
> + return false;
> +}
> +
> +/* Check for metadata block optimization possibilities. */
> +bool
> +xfs_scrub_block_preen(
> + struct xfs_scrub_context *sc,
> + struct xfs_buf *bp,
> + const char *type,
> + bool fs_ok,
> + const char *check,
> + const char *func,
> + int line)
> +{
> + struct xfs_mount *mp = sc->mp;
> + xfs_fsblock_t fsbno;
> + xfs_agnumber_t agno;
> + xfs_agblock_t bno;
> +
> + if (fs_ok)
> + return fs_ok;
> +
> + fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
> + agno = XFS_FSB_TO_AGNO(mp, fsbno);
> + bno = XFS_FSB_TO_AGBNO(mp, fsbno);
> +
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
> + trace_xfs_scrub_block_preen(mp, agno, bno, type, check, func, line);
> + return fs_ok;
> +}
> +
> +/* Check for metadata block corruption. */
> +bool
> +xfs_scrub_block_ok(
> + struct xfs_scrub_context *sc,
> + struct xfs_buf *bp,
> + const char *type,
> + bool fs_ok,
> + const char *check,
> + const char *func,
> + int line)
> +{
> + struct xfs_mount *mp = sc->mp;
> + xfs_fsblock_t fsbno;
> + xfs_agnumber_t agno;
> + xfs_agblock_t bno;
> +
> + if (fs_ok)
> + return fs_ok;
> +
> + fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
> + agno = XFS_FSB_TO_AGNO(mp, fsbno);
> + bno = XFS_FSB_TO_AGBNO(mp, fsbno);
> +
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> + trace_xfs_scrub_block_error(mp, agno, bno, type, check, func, line);
> + return fs_ok;
> +}
> +
> +/* Check for inode metadata corruption. */
> +bool
> +xfs_scrub_ino_ok(
> + struct xfs_scrub_context *sc,
> + xfs_ino_t ino,
> + struct xfs_buf *bp,
> + const char *type,
> + bool fs_ok,
> + const char *check,
> + const char *func,
> + int line)
> +{
> + struct xfs_inode *ip = sc->ip;
> + struct xfs_mount *mp = sc->mp;
> + xfs_fsblock_t fsbno;
> + xfs_agnumber_t agno;
> + xfs_agblock_t bno;
> +
> + if (fs_ok)
> + return fs_ok;
> +
> + if (bp) {
> + fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
> + agno = XFS_FSB_TO_AGNO(mp, fsbno);
> + bno = XFS_FSB_TO_AGBNO(mp, fsbno);
> + } else {
> + agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
> + bno = XFS_INO_TO_AGINO(mp, ip->i_ino);
> + }
> +
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> + trace_xfs_scrub_ino_error(mp, ino, agno, bno, type, check, func, line);
> + return fs_ok;
> +}
> +
> +/* Check for inode metadata optimization possibilities. */
> +bool
> +xfs_scrub_ino_preen(
> + struct xfs_scrub_context *sc,
> + struct xfs_buf *bp,
> + const char *type,
> + bool fs_ok,
> + const char *check,
> + const char *func,
> + int line)
> +{
> + struct xfs_inode *ip = sc->ip;
> + struct xfs_mount *mp = sc->mp;
> + xfs_fsblock_t fsbno;
> + xfs_agnumber_t agno;
> + xfs_agblock_t bno;
> +
> + if (fs_ok)
> + return fs_ok;
> +
> + if (bp) {
> + fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
> + agno = XFS_FSB_TO_AGNO(mp, fsbno);
> + bno = XFS_FSB_TO_AGBNO(mp, fsbno);
> + } else {
> + agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
> + bno = XFS_INO_TO_AGINO(mp, ip->i_ino);
> + }
> +
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
> + trace_xfs_scrub_ino_preen(mp, ip->i_ino, agno, bno, type, check,
> + func, line);
> + return fs_ok;
> +}
> +
> +/* Check for file data block corruption. */
> +bool
> +xfs_scrub_data_ok(
> + struct xfs_scrub_context *sc,
> + int whichfork,
> + xfs_fileoff_t offset,
> + const char *type,
> + bool fs_ok,
> + const char *check,
> + const char *func,
> + int line)
> +{
> + if (fs_ok)
> + return fs_ok;
> +
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> + trace_xfs_scrub_data_error(sc->ip, whichfork, offset, type, check,
> + func, line);
> + return fs_ok;
> +}
> +
> +/* Check for file data block non-corruption problems. */
> +bool
> +xfs_scrub_data_warn_ok(
> + struct xfs_scrub_context *sc,
> + int whichfork,
> + xfs_fileoff_t offset,
> + const char *type,
> + bool fs_ok,
> + const char *check,
> + const char *func,
> + int line)
> +{
> + if (fs_ok)
> + return fs_ok;
> +
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_WARNING;
> + trace_xfs_scrub_data_warning(sc->ip, whichfork, offset, type, check,
> + func, line);
> + return fs_ok;
> +}
> +
> +/* Signal an incomplete scrub. */
> +bool
> +xfs_scrub_incomplete(
> + struct xfs_scrub_context *sc,
> + const char *type,
> + bool fs_ok,
> + const char *check,
> + const char *func,
> + int line)
> +{
> + if (fs_ok)
> + return fs_ok;
> +
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_INCOMPLETE;
> + trace_xfs_scrub_incomplete(sc->mp, type, check, func, line);
> + return fs_ok;
> +}
> +
> +/* Dummy scrubber */
> +
> +int
> +xfs_scrub_dummy(
> + struct xfs_scrub_context *sc)
> +{
> + if (sc->sm->sm_ino || sc->sm->sm_agno)
> + return -EINVAL;
> + if (sc->sm->sm_gen & XFS_SCRUB_FLAG_CORRUPT)
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
> + if (sc->sm->sm_gen & XFS_SCRUB_FLAG_PREEN)
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
> + if (sc->sm->sm_gen & XFS_SCRUB_FLAG_XFAIL)
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_XFAIL;
> + if (sc->sm->sm_gen & XFS_SCRUB_FLAG_XCORRUPT)
> + sc->sm->sm_flags |= XFS_SCRUB_FLAG_XCORRUPT;
> + if (sc->sm->sm_gen & ~XFS_SCRUB_FLAGS_OUT)
> + return -ENOENT;
> +
> + return 0;
> +}
> +
> +/* Per-scrubber setup functions */
> +
> +/* Set us up with a transaction and an empty context. */
> +int
> +xfs_scrub_setup_fs(
> + struct xfs_scrub_context *sc,
> + struct xfs_inode *ip)
> +{
> + return xfs_scrub_trans_alloc(sc->sm, sc->mp,
> + &M_RES(sc->mp)->tr_itruncate, 0, 0, 0, &sc->tp);
> +}
> +
> +/* Scrub setup and teardown */
> +
> +/* Free all the resources and finish the transactions. */
> +STATIC int
> +xfs_scrub_teardown(
> + struct xfs_scrub_context *sc,
> + int error)
> +{
> + if (sc->tp) {
> + xfs_trans_cancel(sc->tp);
> + sc->tp = NULL;
> + }
> + return error;
> +}
> +
> +/* Perform common scrub context initialization. */
> +STATIC int
> +xfs_scrub_setup(
> + struct xfs_inode *ip,
> + struct xfs_scrub_context *sc,
> + const struct xfs_scrub_meta_fns *fns,
> + struct xfs_scrub_metadata *sm,
> + bool try_harder)
> +{
> + memset(sc, 0, sizeof(*sc));
> + sc->mp = ip->i_mount;
> + sc->sm = sm;
> + sc->fns = fns;
> + sc->try_harder = try_harder;
> +
> + return sc->fns->setup(sc, ip);
> +}
> +
> +/* Scrubbing dispatch. */
> +
> +static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
> + { /* dummy verifier */
> + .setup = xfs_scrub_setup_fs,
> + .scrub = xfs_scrub_dummy,
> + },
> +};
> +
> +/* Dispatch metadata scrubbing. */
> +int
> +xfs_scrub_metadata(
> + struct xfs_inode *ip,
> + struct xfs_scrub_metadata *sm)
> +{
> + struct xfs_scrub_context sc;
> + struct xfs_mount *mp = ip->i_mount;
> + const struct xfs_scrub_meta_fns *fns;
> + bool try_harder = false;
> + int error = 0;
> +
> + trace_xfs_scrub(ip, sm, error);
> +
> + /* Forbidden if we are shut down or mounted norecovery. */
> + error = -ESHUTDOWN;
> + if (XFS_FORCED_SHUTDOWN(mp))
> + goto out;
> + error = -ENOTRECOVERABLE;
> + if (mp->m_flags & XFS_MOUNT_NORECOVERY)
> + goto out;
> +
> + /* Check our inputs. */
> + error = -EINVAL;
> + sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
> + if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
> + goto out;
> + if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
> + goto out;
> +
> + /* Do we know about this type of metadata? */
> + error = -ENOENT;
> + if (sm->sm_type > XFS_SCRUB_TYPE_MAX)
> + goto out;
> + fns = &meta_scrub_fns[sm->sm_type];
> + if (fns->scrub == NULL)
> + goto out;
> +
> + /* Does this fs even support this type of metadata? */
> + if (fns->has && !fns->has(&mp->m_sb))
> + goto out;
> +
> + /* We don't know how to repair anything yet. */
> + error = -EOPNOTSUPP;
> + if (sm->sm_flags & XFS_SCRUB_FLAG_REPAIR)
> + goto out;
> +
> + /* This isn't a stable feature. Use with care. */
> + {
> + static bool warned;
> +
> + if (!warned)
> + xfs_alert(mp,
> + "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
> + warned = true;
> + }
> +
> +retry_op:
> + /* Set up for the operation. */
> + error = xfs_scrub_setup(ip, &sc, fns, sm, try_harder);
> + if (error)
> + goto out_teardown;
> +
> + /* Scrub for errors. */
> + error = fns->scrub(&sc);
> + if (!try_harder && error == -EDEADLOCK) {
> + /*
> + * Scrubbers return -EDEADLOCK to mean 'try harder'.
> + * Tear down everything we hold, then set up again with
> + * preparation for worst-case scenarios.
> + */
> + error = xfs_scrub_teardown(&sc, 0);
> + if (error)
> + goto out;
> + try_harder = true;
> + goto retry_op;
> + } else if (error)
> + goto out_teardown;
> +
> + if (xfs_scrub_found_corruption(sm))
> + xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
> +
> +out_teardown:
> + error = xfs_scrub_teardown(&sc, error);
> +out:
> + trace_xfs_scrub_done(ip, sm, error);
> + return error;
> +}
> diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
> new file mode 100644
> index 0000000..4f3113a
> --- /dev/null
> +++ b/fs/xfs/scrub/common.h
> @@ -0,0 +1,179 @@
> +/*
> + * Copyright (C) 2017 Oracle. All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
> + */
> +#ifndef __XFS_REPAIR_COMMON_H__
> +#define __XFS_REPAIR_COMMON_H__
> +
> +/* Did we find something broken? */
> +static inline bool xfs_scrub_found_corruption(struct xfs_scrub_metadata *sm)
> +{
> + return sm->sm_flags & (XFS_SCRUB_FLAG_CORRUPT |
> + XFS_SCRUB_FLAG_XCORRUPT);
> +}
> +
> +struct xfs_scrub_context {
> + /* General scrub state. */
> + struct xfs_mount *mp;
> + struct xfs_scrub_metadata *sm;
> + const struct xfs_scrub_meta_fns *fns;
> + struct xfs_trans *tp;
> + struct xfs_inode *ip;
> + bool try_harder;
> +};
> +
> +/* Should we end the scrub early? */
> +static inline bool
> +xfs_scrub_should_terminate(
> + int *error)
> +{
> + if (fatal_signal_pending(current)) {
> + if (*error == 0)
> + *error = -EAGAIN;
> + return true;
> + }
> + return false;
> +}
> +
> +/*
> + * Grab a transaction. If we're going to repair something, we need to
> + * ensure there's enough reservation to make all the changes. If not,
> + * we can use an empty transaction.
> + */
> +static inline int
> +xfs_scrub_trans_alloc(
> + struct xfs_scrub_metadata *sm,
> + struct xfs_mount *mp,
> + struct xfs_trans_res *resp,
> + uint blocks,
> + uint rtextents,
> + uint flags,
> + struct xfs_trans **tpp)
> +{
> + return xfs_trans_alloc_empty(mp, tpp);
> +}
> +
> +/* Check for operational errors. */
> +bool xfs_scrub_op_ok(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
> + xfs_agblock_t bno, const char *type, int *error,
> + const char *func, int line);
> +#define XFS_SCRUB_OP_ERROR_GOTO(sc, agno, bno, type, error, label) \
> + do { \
> + if (!xfs_scrub_op_ok((sc), (agno), (bno), (type), \
> + (error), __func__, __LINE__)) \
> + goto label; \
> + } while (0)
> +
> +/* Check for operational errors for a file offset. */
> +bool xfs_scrub_file_op_ok(struct xfs_scrub_context *sc, int whichfork,
> + xfs_fileoff_t offset, const char *type,
> + int *error, const char *func, int line);
> +#define XFS_SCRUB_FILE_OP_ERROR_GOTO(sc, which, off, type, error, label) \
> + do { \
> + if (!xfs_scrub_file_op_ok((sc), (which), (off), (type), \
> + (error), __func__, __LINE__)) \
> + goto label; \
> + } while (0)
> +
> +/* Check for metadata block optimization possibilities. */
> +bool xfs_scrub_block_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp,
> + const char *type, bool fs_ok, const char *check,
> + const char *func, int line);
> +#define XFS_SCRUB_PREEN(sc, bp, type, fs_ok) \
> + xfs_scrub_block_preen((sc), (bp), (type), (fs_ok), #fs_ok, \
> + __func__, __LINE__)
> +
> +/* Check for inode metadata optimization possibilities. */
> +bool xfs_scrub_ino_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp,
> + const char *type, bool fs_ok, const char *check,
> + const char *func, int line);
> +#define XFS_SCRUB_INO_PREEN(sc, bp, type, fs_ok) \
> + xfs_scrub_ino_preen((sc), (bp), (type), (fs_ok), #fs_ok, \
> + __func__, __LINE__)
> +
> +/* Check for metadata block corruption. */
> +bool xfs_scrub_block_ok(struct xfs_scrub_context *sc, struct xfs_buf *bp,
> + const char *type, bool fs_ok, const char *check,
> + const char *func, int line);
> +#define XFS_SCRUB_CHECK(sc, bp, type, fs_ok) \
> + xfs_scrub_block_ok((sc), (bp), (type), (fs_ok), #fs_ok, \
> + __func__, __LINE__)
> +#define XFS_SCRUB_GOTO(sc, bp, type, fs_ok, label) \
> + do { \
> + if (!xfs_scrub_block_ok((sc), (bp), (type), (fs_ok), \
> + #fs_ok, __func__, __LINE__)) \
> + goto label; \
> + } while (0)
> +
> +/* Check for inode metadata corruption. */
> +bool xfs_scrub_ino_ok(struct xfs_scrub_context *sc, xfs_ino_t ino,
> + struct xfs_buf *bp, const char *type, bool fs_ok,
> + const char *check, const char *func, int line);
> +#define XFS_SCRUB_INO_CHECK(sc, ino, bp, type, fs_ok) \
> + xfs_scrub_ino_ok((sc), (ino), (bp), (type), (fs_ok), #fs_ok, \
> + __func__, __LINE__)
> +#define XFS_SCRUB_INO_GOTO(sc, ino, bp, type, fs_ok, label) \
> + do { \
> + if (!xfs_scrub_ino_ok((sc), (ino), (bp), (type), (fs_ok), \
> + #fs_ok, __func__, __LINE__)) \
> + goto label; \
> + } while (0)
> +
> +/* Check for file data block corruption. */
> +bool xfs_scrub_data_ok(struct xfs_scrub_context *sc, int whichfork,
> + xfs_fileoff_t offset, const char *type, bool fs_ok,
> + const char *check, const char *func, int line);
> +#define XFS_SCRUB_DATA_CHECK(sc, whichfork, offset, type, fs_ok) \
> + xfs_scrub_data_ok((sc), (whichfork), (offset), (type), (fs_ok), \
> + #fs_ok, __func__, __LINE__)
> +#define XFS_SCRUB_DATA_GOTO(sc, whichfork, offset, type, fs_ok, label) \
> + do { \
> + if (!xfs_scrub_data_ok((sc), (whichfork), (offset), \
> + (type), (fs_ok), #fs_ok, __func__, __LINE__)) \
> + goto label; \
> + } while (0)
> +
> +/* Check for file data block non-corruption problems. */
> +bool xfs_scrub_data_warn_ok(struct xfs_scrub_context *sc, int whichfork,
> + xfs_fileoff_t offset, const char *type, bool fs_ok,
> + const char *check, const char *func, int line);
> +#define XFS_SCRUB_DATA_WARN(sc, whichfork, offset, type, fs_ok) \
> + xfs_scrub_data_warn_ok((sc), (whichfork), (offset), (type), (fs_ok), \
> + #fs_ok, __func__, __LINE__)
> +
> +/* Signal an incomplete scrub. */
> +bool xfs_scrub_incomplete(struct xfs_scrub_context *sc, const char *type,
> + bool fs_ok, const char *check, const char *func,
> + int line);
> +#define XFS_SCRUB_INCOMPLETE(sc, type, fs_ok) \
> + xfs_scrub_incomplete((sc), (type), (fs_ok), \
> + #fs_ok, __func__, __LINE__)
> +
> +/* Setup functions */
> +
> +#define SETUP_FN(name) int name(struct xfs_scrub_context *sc, struct xfs_inode *ip)
> +SETUP_FN(xfs_scrub_setup_fs);
> +#undef SETUP_FN
> +
> +/* Metadata scrubbers */
> +
> +#define SCRUB_FN(name) int name(struct xfs_scrub_context *sc)
> +SCRUB_FN(xfs_scrub_dummy);
> +#undef SCRUB_FN
> +
> +#endif /* __XFS_REPAIR_COMMON_H__ */
> diff --git a/fs/xfs/scrub/xfs_scrub.h b/fs/xfs/scrub/xfs_scrub.h
> new file mode 100644
> index 0000000..e00e0ea
> --- /dev/null
> +++ b/fs/xfs/scrub/xfs_scrub.h
> @@ -0,0 +1,29 @@
> +/*
> + * Copyright (C) 2017 Oracle. All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
> + */
> +#ifndef __XFS_SCRUB_H__
> +#define __XFS_SCRUB_H__
> +
> +#ifndef CONFIG_XFS_ONLINE_SCRUB
> +# define xfs_scrub_metadata(ip, sm) (-ENOTTY)
> +#else
> +int xfs_scrub_metadata(struct xfs_inode *ip, struct xfs_scrub_metadata *sm);
> +#endif /* CONFIG_XFS_ONLINE_SCRUB */
> +
> +#endif /* __XFS_SCRUB_H__ */
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index cc00260..87b3874 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -44,6 +44,7 @@
> #include "xfs_btree.h"
> #include <linux/fsmap.h>
> #include "xfs_fsmap.h"
> +#include "scrub/xfs_scrub.h"
>
> #include <linux/capability.h>
> #include <linux/cred.h>
> @@ -1689,6 +1690,30 @@ xfs_ioc_getfsmap(
> return 0;
> }
>
> +STATIC int
> +xfs_ioc_scrub_metadata(
> + struct xfs_inode *ip,
> + void __user *arg)
> +{
> + struct xfs_scrub_metadata scrub;
> + int error;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + if (copy_from_user(&scrub, arg, sizeof(scrub)))
> + return -EFAULT;
> +
> + error = xfs_scrub_metadata(ip, &scrub);
> + if (error)
> + return error;
> +
> + if (copy_to_user(arg, &scrub, sizeof(scrub)))
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> int
> xfs_ioc_swapext(
> xfs_swapext_t *sxp)
> @@ -1872,6 +1897,9 @@ xfs_file_ioctl(
> case FS_IOC_GETFSMAP:
> return xfs_ioc_getfsmap(ip, arg);
>
> + case XFS_IOC_SCRUB_METADATA:
> + return xfs_ioc_scrub_metadata(ip, arg);
> +
> case XFS_IOC_FD_TO_HANDLE:
> case XFS_IOC_PATH_TO_HANDLE:
> case XFS_IOC_PATH_TO_FSHANDLE: {
> diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
> index e8b4de3..972d4bd 100644
> --- a/fs/xfs/xfs_ioctl32.c
> +++ b/fs/xfs/xfs_ioctl32.c
> @@ -557,6 +557,7 @@ xfs_file_compat_ioctl(
> case XFS_IOC_ERROR_CLEARALL:
> case FS_IOC_GETFSMAP:
> case XFS_IOC_GET_AG_RESBLKS:
> + case XFS_IOC_SCRUB_METADATA:
> return xfs_file_ioctl(filp, cmd, p);
> #ifndef BROKEN_X86_ALIGNMENT
> /* These are handled fine if no alignment issues */
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 2e7e193..d4de29b 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -3312,7 +3312,7 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
>
> /* scrub */
> #define XFS_SCRUB_TYPE_DESC \
> - { 0, NULL }
> + { XFS_SCRUB_TYPE_TEST, "dummy" }
> DECLARE_EVENT_CLASS(xfs_scrub_class,
> TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
> int error),
> @@ -3330,6 +3330,11 @@ DECLARE_EVENT_CLASS(xfs_scrub_class,
> TP_fast_assign(
> __entry->dev = ip->i_mount->m_super->s_dev;
> __entry->ino = ip->i_ino;
> + __entry->type = sm->sm_type;
> + __entry->agno = sm->sm_agno;
> + __entry->inum = sm->sm_ino;
> + __entry->gen = sm->sm_gen;
> + __entry->flags = sm->sm_flags;
> __entry->error = error;
> ),
> TP_printk("dev %d:%d ino %llu type %s agno %u inum %llu gen %u flags 0x%x error %d",
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
next prev parent reply other threads:[~2017-07-23 16:37 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-21 4:38 [PATCH v8 00/22] xfs: online scrub support Darrick J. Wong
2017-07-21 4:38 ` [PATCH 01/22] xfs: query the per-AG reservation counters Darrick J. Wong
2017-07-23 16:16 ` Allison Henderson
2017-07-23 22:25 ` Dave Chinner
2017-07-24 19:07 ` Darrick J. Wong
2017-07-21 4:38 ` [PATCH 02/22] xfs: add scrub tracepoints Darrick J. Wong
2017-07-23 16:23 ` Allison Henderson
2017-07-21 4:38 ` [PATCH 03/22] xfs: create an ioctl to scrub AG metadata Darrick J. Wong
2017-07-23 16:37 ` Allison Henderson [this message]
2017-07-23 23:45 ` Dave Chinner
2017-07-24 21:14 ` Darrick J. Wong
2017-07-21 4:38 ` [PATCH 04/22] xfs: generic functions to scrub metadata and btrees Darrick J. Wong
2017-07-23 16:40 ` Allison Henderson
2017-07-24 1:05 ` Dave Chinner
2017-07-24 21:58 ` Darrick J. Wong
2017-07-24 23:15 ` Dave Chinner
2017-07-25 0:39 ` Darrick J. Wong
2017-07-21 4:39 ` [PATCH 05/22] xfs: scrub in-memory metadata buffers Darrick J. Wong
2017-07-23 16:48 ` Allison Henderson
2017-07-24 1:43 ` Dave Chinner
2017-07-24 22:36 ` Darrick J. Wong
2017-07-24 23:38 ` Dave Chinner
2017-07-25 0:14 ` Darrick J. Wong
2017-07-25 3:32 ` Dave Chinner
2017-07-25 5:27 ` Darrick J. Wong
2017-07-21 4:39 ` [PATCH 06/22] xfs: scrub the backup superblocks Darrick J. Wong
2017-07-23 16:50 ` Allison Henderson
2017-07-25 4:05 ` Dave Chinner
2017-07-25 5:42 ` Darrick J. Wong
2017-07-21 4:39 ` [PATCH 07/22] xfs: scrub AGF and AGFL Darrick J. Wong
2017-07-23 16:59 ` Allison Henderson
2017-07-21 4:39 ` [PATCH 08/22] xfs: scrub the AGI Darrick J. Wong
2017-07-23 17:02 ` Allison Henderson
2017-07-21 4:39 ` [PATCH 09/22] xfs: scrub free space btrees Darrick J. Wong
2017-07-23 17:09 ` Allison Henderson
2017-07-21 4:39 ` [PATCH 10/22] xfs: scrub inode btrees Darrick J. Wong
2017-07-23 17:15 ` Allison Henderson
2017-07-21 4:39 ` [PATCH 11/22] xfs: scrub rmap btrees Darrick J. Wong
2017-07-23 17:21 ` Allison Henderson
2017-07-21 4:39 ` [PATCH 12/22] xfs: scrub refcount btrees Darrick J. Wong
2017-07-23 17:25 ` Allison Henderson
2017-07-21 4:39 ` [PATCH 13/22] xfs: scrub inodes Darrick J. Wong
2017-07-23 17:38 ` Allison Henderson
2017-07-24 20:02 ` Darrick J. Wong
2017-07-21 4:40 ` [PATCH 14/22] xfs: scrub inode block mappings Darrick J. Wong
2017-07-23 17:41 ` Allison Henderson
2017-07-24 20:05 ` Darrick J. Wong
2017-07-21 4:40 ` [PATCH 15/22] xfs: scrub directory/attribute btrees Darrick J. Wong
2017-07-23 17:45 ` Allison Henderson
2017-07-21 4:40 ` [PATCH 16/22] xfs: scrub directory metadata Darrick J. Wong
2017-07-23 17:51 ` Allison Henderson
2017-07-21 4:40 ` [PATCH 17/22] xfs: scrub directory freespace Darrick J. Wong
2017-07-23 17:55 ` Allison Henderson
2017-07-21 4:40 ` [PATCH 18/22] xfs: scrub extended attributes Darrick J. Wong
2017-07-23 17:57 ` Allison Henderson
2017-07-21 4:40 ` [PATCH 19/22] xfs: scrub symbolic links Darrick J. Wong
2017-07-23 17:59 ` Allison Henderson
2017-07-21 4:40 ` [PATCH 20/22] xfs: scrub parent pointers Darrick J. Wong
2017-07-23 18:03 ` Allison Henderson
2017-07-21 4:40 ` [PATCH 21/22] xfs: scrub realtime bitmap/summary Darrick J. Wong
2017-07-23 18:05 ` Allison Henderson
2017-07-21 4:40 ` [PATCH 22/22] xfs: scrub quota information Darrick J. Wong
2017-07-23 18:07 ` Allison Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=41bd3e4e-1bc0-b5cc-1f8e-6ac9ca7f3b08@oracle.com \
--to=allison.henderson@oracle.com \
--cc=darrick.wong@oracle.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).