From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 09/19] xfs: add helper routines for the repair code
Date: Fri, 25 Aug 2017 15:17:52 -0700 [thread overview]
Message-ID: <150369947182.9957.11780536280009861815.stgit@magnolia> (raw)
In-Reply-To: <150369940879.9957.6303798184036268321.stgit@magnolia>
From: Darrick J. Wong <darrick.wong@oracle.com>
Add some helper functions for repair functions that will help us to
allocate and initialize new metadata blocks for btrees that we're
rebuilding.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/scrub/common.c | 4
fs/xfs/scrub/common.h | 3
fs/xfs/scrub/repair.c | 911 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/scrub/repair.h | 74 ++++
fs/xfs/scrub/scrub.c | 3
fs/xfs/scrub/scrub.h | 9
7 files changed, 1004 insertions(+), 1 deletion(-)
create mode 100644 fs/xfs/scrub/repair.c
create mode 100644 fs/xfs/scrub/repair.h
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index f8b3915..2dc82d5 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -156,6 +156,7 @@ xfs-y += $(addprefix scrub/, \
inode.o \
parent.o \
refcount.o \
+ repair.o \
rmap.o \
scrub.o \
symlink.o \
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index db32c31..515bee6 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -47,6 +47,7 @@
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/btree.h"
+#include "scrub/repair.h"
/* Common code for the metadata scrubbers. */
@@ -699,7 +700,8 @@ xfs_scrub_setup_fs(
struct xfs_inode *ip)
{
return xfs_scrub_trans_alloc(sc->sm, sc->mp,
- &M_RES(sc->mp)->tr_itruncate, 0, 0, 0, &sc->tp);
+ &M_RES(sc->mp)->tr_itruncate,
+ xfs_repair_calc_ag_resblks(sc), 0, 0, &sc->tp);
}
/* Set us up with AG headers and btree cursors. */
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index e20fb1d..6db9cfe 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -48,6 +48,9 @@ xfs_scrub_trans_alloc(
uint flags,
struct xfs_trans **tpp)
{
+ if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+ return xfs_trans_alloc(mp, resp, blocks, rtextents, flags, tpp);
+
return xfs_trans_alloc_empty(mp, tpp);
}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
new file mode 100644
index 0000000..9df2f97
--- /dev/null
+++ b/fs/xfs/scrub/repair.c
@@ -0,0 +1,911 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_extent_busy.h"
+#include "xfs_ag_resv.h"
+#include "xfs_trans_space.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+
+/*
+ * Roll a transaction, keeping the AG headers locked and reinitializing
+ * the btree cursors.
+ */
+int
+xfs_repair_roll_ag_trans(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_trans *tp;
+ int error;
+
+ /* Keep the AG header buffers locked so we can keep going. */
+ xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
+ xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
+ xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
+
+ /* Roll the transaction. */
+ tp = sc->tp;
+ error = xfs_trans_roll(&sc->tp, NULL);
+ if (error)
+ return error;
+
+ /* Join the buffer to the new transaction or release the hold. */
+ if (sc->tp != tp) {
+ xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
+ xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
+ xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
+ } else {
+ xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
+ xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
+ xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
+ }
+
+ return error;
+}
+
+/*
+ * Does the given AG have enough space to rebuild a btree? Neither AG
+ * reservation can be critical, and we must have enough space (factoring
+ * in AG reservations) to construct a whole btree.
+ */
+bool
+xfs_repair_ag_has_space(
+ struct xfs_perag *pag,
+ xfs_extlen_t nr_blocks,
+ enum xfs_ag_resv_type type)
+{
+ return !xfs_ag_resv_critical(pag, XFS_AG_RESV_AGFL) &&
+ !xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA) &&
+ pag->pagf_freeblks > xfs_ag_resv_needed(pag, type) + nr_blocks;
+}
+
+/* Allocate a block in an AG. */
+int
+xfs_repair_alloc_ag_block(
+ struct xfs_scrub_context *sc,
+ struct xfs_owner_info *oinfo,
+ xfs_fsblock_t *fsbno,
+ enum xfs_ag_resv_type resv)
+{
+ struct xfs_alloc_arg args = {0};
+ xfs_agblock_t bno;
+ int error;
+
+ if (resv == XFS_AG_RESV_AGFL) {
+ error = xfs_alloc_get_freelist(sc->tp, sc->sa.agf_bp, &bno, 1);
+ if (error)
+ return error;
+ if (bno == NULLAGBLOCK)
+ return -ENOSPC;
+ xfs_extent_busy_reuse(sc->mp, sc->sa.agno, bno,
+ 1, false);
+ *fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, bno);
+ return 0;
+ }
+
+ args.tp = sc->tp;
+ args.mp = sc->mp;
+ args.oinfo = *oinfo;
+ args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.agno, 0);
+ args.minlen = 1;
+ args.maxlen = 1;
+ args.prod = 1;
+ args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ args.resv = resv;
+
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ return error;
+ if (args.fsbno == NULLFSBLOCK)
+ return -ENOSPC;
+ ASSERT(args.len == 1);
+ *fsbno = args.fsbno;
+
+ return 0;
+}
+
+/* Initialize an AG block to a zeroed out btree header. */
+int
+xfs_repair_init_btblock(
+ struct xfs_scrub_context *sc,
+ xfs_fsblock_t fsb,
+ struct xfs_buf **bpp,
+ xfs_btnum_t btnum,
+ const struct xfs_buf_ops *ops)
+{
+ struct xfs_trans *tp = sc->tp;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+
+ trace_xfs_repair_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
+ XFS_FSB_TO_AGBNO(mp, fsb), btnum);
+
+ ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno);
+ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
+ XFS_FSB_TO_BB(mp, 1), 0);
+ xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
+ xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno,
+ XFS_BTREE_CRC_BLOCKS);
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
+ xfs_trans_log_buf(tp, bp, 0, bp->b_length);
+ bp->b_ops = ops;
+ *bpp = bp;
+
+ return 0;
+}
+
+/* Ensure the freelist is full. */
+int
+xfs_repair_fix_freelist(
+ struct xfs_scrub_context *sc,
+ bool can_shrink)
+{
+ struct xfs_alloc_arg args = {0};
+ int error;
+
+ args.mp = sc->mp;
+ args.tp = sc->tp;
+ args.agno = sc->sa.agno;
+ args.alignment = 1;
+ args.pag = xfs_perag_get(args.mp, sc->sa.agno);
+ args.resv = XFS_AG_RESV_AGFL;
+
+ error = xfs_alloc_fix_freelist(&args,
+ can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
+ xfs_perag_put(args.pag);
+
+ return error;
+}
+
+/* Put a block back on the AGFL. */
+int
+xfs_repair_put_freelist(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno)
+{
+ struct xfs_owner_info oinfo;
+ int error;
+
+ /*
+ * Since we're "freeing" a lost block onto the AGFL, we have to
+ * create an rmap for the block prior to merging it or else other
+ * parts will break.
+ */
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+ error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno, agbno, 1,
+ &oinfo);
+ if (error)
+ return error;
+
+ /* Put the block on the AGFL. */
+ error = xfs_alloc_put_freelist(sc->tp, sc->sa.agf_bp, sc->sa.agfl_bp,
+ agbno, 0);
+ if (error)
+ return error;
+ xfs_extent_busy_insert(sc->tp, sc->sa.agno, agbno, 1,
+ XFS_EXTENT_BUSY_SKIP_DISCARD);
+
+ /* Make sure the AGFL doesn't overfill. */
+ return xfs_repair_fix_freelist(sc, true);
+}
+
+/*
+ * For a given metadata extent and owner, delete the associated rmap.
+ * If the block has no other owners, free it.
+ */
+STATIC int
+xfs_repair_free_or_unmap_extent(
+ struct xfs_scrub_context *sc,
+ xfs_fsblock_t fsbno,
+ xfs_extlen_t len,
+ struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type resv)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_btree_cur *rmap_cur;
+ struct xfs_buf *agf_bp = NULL;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ bool has_other_rmap;
+ int error = 0;
+
+ ASSERT(xfs_sb_version_hasrmapbt(&mp->m_sb));
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+ trace_xfs_repair_free_or_unmap_extent(mp, agno, agbno, len);
+
+ for (; len > 0 && !error; len--, agbno++, fsbno++) {
+ ASSERT(sc->ip != NULL || agno == sc->sa.agno);
+
+ /* Can we find any other rmappings? */
+ if (sc->ip) {
+ error = xfs_alloc_read_agf(mp, sc->tp, agno, 0,
+ &agf_bp);
+ if (error)
+ break;
+ if (!agf_bp) {
+ error = -ENOMEM;
+ break;
+ }
+ }
+ rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp,
+ agf_bp ? agf_bp : sc->sa.agf_bp, agno);
+ error = xfs_rmap_has_other_keys(rmap_cur, agbno, 1, oinfo,
+ &has_other_rmap);
+ if (error)
+ goto out_cur;
+ xfs_btree_del_cursor(rmap_cur, XFS_BTREE_NOERROR);
+ if (agf_bp)
+ xfs_trans_brelse(sc->tp, agf_bp);
+
+ /*
+ * If there are other rmappings, this block is cross
+ * linked and must not be freed. Remove the reverse
+ * mapping and move on. Otherwise, we were the only
+ * owner of the block, so free the extent, which will
+ * also remove the rmap.
+ */
+ if (has_other_rmap)
+ error = xfs_rmap_free(sc->tp, agf_bp, agno, agbno, 1,
+ oinfo);
+ else if (resv == XFS_AG_RESV_AGFL)
+ error = xfs_repair_put_freelist(sc, agbno);
+ else
+ error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
+ if (error)
+ break;
+
+ if (sc->ip)
+ error = xfs_trans_roll(&sc->tp, sc->ip);
+ else
+ error = xfs_repair_roll_ag_trans(sc);
+ }
+
+ return error;
+out_cur:
+ xfs_btree_del_cursor(rmap_cur, XFS_BTREE_ERROR);
+ if (agf_bp)
+ xfs_trans_brelse(sc->tp, agf_bp);
+ return error;
+}
+
+/* Collect a dead btree extent for later disposal. */
+int
+xfs_repair_collect_btree_extent(
+ struct xfs_scrub_context *sc,
+ struct list_head *btlist,
+ xfs_fsblock_t fsbno,
+ xfs_extlen_t len)
+{
+ struct xfs_repair_btree_extent *rbe;
+
+ trace_xfs_repair_collect_btree_extent(sc->mp,
+ XFS_FSB_TO_AGNO(mp, fsbno),
+ XFS_FSB_TO_AGBNO(mp, fsbno), len);
+
+ rbe = kmem_alloc(sizeof(struct xfs_repair_btree_extent),
+ KM_MAYFAIL | KM_NOFS);
+ if (!rbe)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&rbe->list);
+ rbe->fsbno = fsbno;
+ rbe->len = len;
+ list_add_tail(&rbe->list, btlist);
+
+ return 0;
+}
+
+/* Invalidate buffers for blocks we're dumping. */
+int
+xfs_repair_invalidate_blocks(
+ struct xfs_scrub_context *sc,
+ struct list_head *btlist)
+{
+ struct xfs_repair_btree_extent *rbe;
+ struct xfs_repair_btree_extent *n;
+ struct xfs_buf *bp;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_agblock_t i;
+
+ list_for_each_entry_safe(rbe, n, btlist, list) {
+ agno = XFS_FSB_TO_AGNO(sc->mp, rbe->fsbno);
+ agbno = XFS_FSB_TO_AGBNO(sc->mp, rbe->fsbno);
+ for (i = 0; i < rbe->len; i++) {
+ bp = xfs_btree_get_bufs(sc->mp, sc->tp, agno,
+ agbno + i, 0);
+ xfs_trans_binval(sc->tp, bp);
+ }
+ }
+
+ return 0;
+}
+
+/* Dispose of dead btree extents. If oinfo is NULL, just delete the list. */
+int
+xfs_repair_reap_btree_extents(
+ struct xfs_scrub_context *sc,
+ struct list_head *btlist,
+ struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type)
+{
+ struct xfs_repair_btree_extent *rbe;
+ struct xfs_repair_btree_extent *n;
+ int error = 0;
+
+ list_for_each_entry_safe(rbe, n, btlist, list) {
+ if (oinfo) {
+ error = xfs_repair_free_or_unmap_extent(sc, rbe->fsbno,
+ rbe->len, oinfo, type);
+ if (error)
+ oinfo = NULL;
+ }
+ list_del(&rbe->list);
+ kmem_free(rbe);
+ }
+
+ return error;
+}
+
+/* Errors happened, just delete the dead btree extent list. */
+void
+xfs_repair_cancel_btree_extents(
+ struct xfs_scrub_context *sc,
+ struct list_head *btlist)
+{
+ xfs_repair_reap_btree_extents(sc, btlist, NULL, XFS_AG_RESV_NONE);
+}
+
+/* Compare two btree extents. */
+static int
+xfs_repair_btree_extent_cmp(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
+{
+ struct xfs_repair_btree_extent *ap;
+ struct xfs_repair_btree_extent *bp;
+
+ ap = container_of(a, struct xfs_repair_btree_extent, list);
+ bp = container_of(b, struct xfs_repair_btree_extent, list);
+
+ if (ap->fsbno > bp->fsbno)
+ return 1;
+ else if (ap->fsbno < bp->fsbno)
+ return -1;
+ return 0;
+}
+
+/*
+ * Process a block on the extent list. This involves advancing along the
+ * subtract list until it catches fsb, then deciding if fsb is to be
+ * preserved on the list or removed from it, and doing all the list
+ * bookkeeping as necessary.
+ */
+STATIC int
+xfs_repair_subtract_extents_block(
+ struct xfs_repair_btree_extent *sub,
+ struct xfs_repair_btree_extent **subp,
+ struct list_head *sublist,
+ struct xfs_repair_btree_extent **rbe,
+ xfs_fsblock_t *newfsb,
+ xfs_fsblock_t fsb,
+ xfs_extlen_t *newlen)
+{
+ struct xfs_repair_btree_extent *newrbe;
+
+ /*
+ * If the current location of the extent list is beyond the
+ * subtract list, move the subtract list forward by one block or
+ * by one record.
+ */
+ while (fsb > sub->fsbno || sub->len == 0) {
+ if (sub->len) {
+ sub->len--;
+ sub->fsbno++;
+ } else {
+ /*
+ * Get the next subtract extent. If there isn't
+ * one, make the current extent match the
+ * unprocessed part of that extent, and jump
+ * out.
+ */
+ if ((*subp)->list.next == sublist ||
+ (*subp)->list.next == NULL) {
+ (*rbe)->len -= fsb - (*rbe)->fsbno;
+ (*rbe)->fsbno = fsb;
+ *subp = NULL;
+ *rbe = NULL;
+ return 0;
+ }
+ *subp = list_next_entry(*subp, list);
+ memcpy(sub, *subp, sizeof(*sub));
+ }
+ }
+
+ if (fsb != sub->fsbno) {
+ /*
+ * Block not in the subtract list; stash it for later
+ * reinsertion in the list.
+ */
+ if (*newfsb == NULLFSBLOCK) {
+ *newfsb = fsb;
+ *newlen = 1;
+ } else
+ (*newlen)++;
+ } else {
+ /* Match! */
+ if (*newfsb != NULLFSBLOCK) {
+ /*
+ * Last block of the extent and we have a saved
+ * extent. Store the saved extent in this
+ * extent.
+ */
+ if (fsb == (*rbe)->fsbno + (*rbe)->len - 1) {
+ (*rbe)->fsbno = *newfsb;
+ (*rbe)->len = *newlen;
+ *newfsb = NULLFSBLOCK;
+ *rbe = NULL;
+ return 0;
+ }
+ /* Stash the new extent in the list. */
+ newrbe = kmem_alloc(
+ sizeof(struct xfs_repair_btree_extent),
+ KM_MAYFAIL | KM_NOFS);
+ if (!newrbe)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&newrbe->list);
+ newrbe->fsbno = *newfsb;
+ newrbe->len = *newlen;
+ list_add_tail(&newrbe->list,
+ &(*rbe)->list);
+ }
+
+ *newfsb = NULLFSBLOCK;
+ *newlen = 0;
+ }
+
+ return 0;
+}
+
+/* Remove all the blocks in sublist from exlist. */
+int
+xfs_repair_subtract_extents(
+ struct xfs_scrub_context *sc,
+ struct list_head *exlist,
+ struct list_head *sublist)
+{
+ struct xfs_repair_btree_extent *newrbe;
+ struct xfs_repair_btree_extent *rbe;
+ struct xfs_repair_btree_extent *n;
+ struct xfs_repair_btree_extent *subp;
+ struct xfs_repair_btree_extent sub;
+ xfs_fsblock_t fsb;
+ xfs_fsblock_t newfsb;
+ xfs_extlen_t newlen;
+ int error;
+
+ list_sort(NULL, exlist, xfs_repair_btree_extent_cmp);
+ list_sort(NULL, sublist, xfs_repair_btree_extent_cmp);
+
+ subp = list_first_entry(sublist, struct xfs_repair_btree_extent, list);
+ if (subp == NULL)
+ return 0;
+
+ memcpy(&sub, subp, sizeof(sub));
+ /* For every block mentioned in exlist... */
+ list_for_each_entry_safe(rbe, n, exlist, list) {
+ newfsb = NULLFSBLOCK;
+ newlen = 0;
+ for (fsb = rbe->fsbno; fsb < rbe->fsbno + rbe->len; fsb++) {
+ error = xfs_repair_subtract_extents_block(&sub, &subp,
+ sublist, &rbe, &newfsb, fsb, &newlen);
+ if (error)
+ return error;
+ }
+
+ /* If we have an extent to add back, do that now. */
+ if (newfsb != NULLFSBLOCK) {
+ if (rbe) {
+ newrbe = rbe;
+ rbe = NULL;
+ } else {
+ newrbe = kmem_alloc(
+ sizeof(struct xfs_repair_btree_extent),
+ KM_MAYFAIL | KM_NOFS);
+ if (!newrbe)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&newrbe->list);
+ list_add_tail(&newrbe->list, &rbe->list);
+ }
+ newrbe->fsbno = newfsb;
+ newrbe->len = newlen;
+ }
+ if (rbe) {
+ list_del(&rbe->list);
+ kmem_free(rbe);
+ }
+ if (subp == NULL)
+ break;
+ }
+
+ return 0;
+}
+
+struct xfs_repair_find_ag_btree_roots_info {
+ struct xfs_buf *agfl_bp;
+ struct xfs_repair_find_ag_btree *btree_info;
+};
+
+/* Is this an OWN_AG block in the AGFL? */
+STATIC bool
+xfs_repair_is_block_in_agfl(
+ struct xfs_mount *mp,
+ uint64_t rmap_owner,
+ xfs_agblock_t agbno,
+ struct xfs_buf *agf_bp,
+ struct xfs_buf *agfl_bp)
+{
+ struct xfs_agf *agf;
+ __be32 *agfl_bno;
+ unsigned int flfirst;
+ unsigned int fllast;
+ int i;
+
+ if (rmap_owner != XFS_RMAP_OWN_AG)
+ return false;
+
+ agf = XFS_BUF_TO_AGF(agf_bp);
+ agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agfl_bp);
+ flfirst = be32_to_cpu(agf->agf_flfirst);
+ fllast = be32_to_cpu(agf->agf_fllast);
+
+ /* Skip an empty AGFL. */
+ if (agf->agf_flcount == cpu_to_be32(0))
+ return false;
+
+ /* first to last is a consecutive list. */
+ if (fllast >= flfirst) {
+ for (i = flfirst; i <= fllast; i++) {
+ if (be32_to_cpu(agfl_bno[i]) == agbno)
+ return true;
+ }
+
+ return false;
+ }
+
+ /* first to the end */
+ for (i = flfirst; i < XFS_AGFL_SIZE(mp); i++) {
+ if (be32_to_cpu(agfl_bno[i]) == agbno)
+ return true;
+ }
+
+ /* the start to last. */
+ for (i = 0; i <= fllast; i++) {
+ if (be32_to_cpu(agfl_bno[i]) == agbno)
+ return true;
+ }
+
+ return false;
+}
+
+/* Find btree roots from the AGF. */
+STATIC int
+xfs_repair_find_ag_btree_roots_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_repair_find_ag_btree_roots_info *ri = priv;
+ struct xfs_repair_find_ag_btree *fab;
+ struct xfs_buf *bp;
+ struct xfs_btree_block *btblock;
+ xfs_daddr_t daddr;
+ xfs_agblock_t agbno;
+ int error = 0;
+
+ if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner))
+ return 0;
+
+ for (agbno = 0; agbno < rec->rm_blockcount; agbno++) {
+ daddr = XFS_AGB_TO_DADDR(mp, cur->bc_private.a.agno,
+ rec->rm_startblock + agbno);
+ for (fab = ri->btree_info; fab->buf_ops; fab++) {
+ if (rec->rm_owner != fab->rmap_owner)
+ continue;
+
+ /*
+ * Blocks in the AGFL have stale contents that
+ * might just happen to have a matching magic
+ * and uuid. We don't want to pull these blocks
+ * in as part of a tree root, so we have to
+ * filter out the AGFL stuff here. If the AGFL
+ * looks insane we'll just refuse to repair.
+ */
+ if (xfs_repair_is_block_in_agfl(mp, rec->rm_owner,
+ rec->rm_startblock + agbno,
+ cur->bc_private.a.agbp, ri->agfl_bp))
+ continue;
+
+ error = xfs_trans_read_buf(mp, cur->bc_tp,
+ mp->m_ddev_targp, daddr, mp->m_bsize,
+ 0, &bp, NULL);
+ if (error)
+ return error;
+
+ /* Does this look like a block we want? */
+ btblock = XFS_BUF_TO_BLOCK(bp);
+ if (be32_to_cpu(btblock->bb_magic) != fab->magic)
+ goto next_fab;
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !uuid_equal(&btblock->bb_u.s.bb_uuid,
+ &mp->m_sb.sb_meta_uuid))
+ goto next_fab;
+ if (fab->root != NULLAGBLOCK &&
+ xfs_btree_get_level(btblock) <= fab->level)
+ goto next_fab;
+
+ /* Make sure we pass the verifiers. */
+ bp->b_ops = fab->buf_ops;
+ bp->b_ops->verify_read(bp);
+ if (bp->b_error)
+ goto next_fab;
+ fab->root = rec->rm_startblock + agbno;
+ fab->level = xfs_btree_get_level(btblock);
+
+ trace_xfs_repair_find_ag_btree_roots_helper(mp,
+ cur->bc_private.a.agno,
+ rec->rm_startblock + agbno,
+ be32_to_cpu(btblock->bb_magic),
+ fab->level);
+next_fab:
+ xfs_trans_brelse(cur->bc_tp, bp);
+ if (be32_to_cpu(btblock->bb_magic) == fab->magic)
+ break;
+ }
+ }
+
+ return error;
+}
+
+/* Find the roots of the given btrees from the rmap info. */
+int
+xfs_repair_find_ag_btree_roots(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *agf_bp,
+ struct xfs_repair_find_ag_btree *btree_info,
+ struct xfs_buf *agfl_bp)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_repair_find_ag_btree_roots_info ri;
+ struct xfs_repair_find_ag_btree *fab;
+ struct xfs_btree_cur *cur;
+ int error;
+
+ ri.btree_info = btree_info;
+ ri.agfl_bp = agfl_bp;
+ for (fab = btree_info; fab->buf_ops; fab++) {
+ ASSERT(agfl_bp || fab->rmap_owner != XFS_RMAP_OWN_AG);
+ fab->root = NULLAGBLOCK;
+ fab->level = 0;
+ }
+
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+ error = xfs_rmap_query_all(cur, xfs_repair_find_ag_btree_roots_helper,
+ &ri);
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+ for (fab = btree_info; !error && fab->buf_ops; fab++)
+ if (fab->root != NULLAGBLOCK)
+ fab->level++;
+
+ return error;
+}
+
+/* Reset the superblock counters from the AGF/AGI. */
+int
+xfs_repair_reset_counters(
+ struct xfs_mount *mp)
+{
+ struct xfs_trans *tp;
+ struct xfs_buf *agi_bp;
+ struct xfs_buf *agf_bp;
+ struct xfs_agi *agi;
+ struct xfs_agf *agf;
+ xfs_agnumber_t agno;
+ xfs_ino_t icount = 0;
+ xfs_ino_t ifree = 0;
+ xfs_filblks_t fdblocks = 0;
+ int64_t delta_icount;
+ int64_t delta_ifree;
+ int64_t delta_fdblocks;
+ int error;
+
+ trace_xfs_repair_reset_counters(mp);
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ /* Count all the inodes... */
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agi_bp);
+ if (error)
+ goto out;
+ agi = XFS_BUF_TO_AGI(agi_bp);
+ icount += be32_to_cpu(agi->agi_count);
+ ifree += be32_to_cpu(agi->agi_freecount);
+
+ /* Add up the free/freelist/bnobt/cntbt blocks... */
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agf_bp);
+ if (error)
+ goto out;
+ if (!agf_bp) {
+ error = -ENOMEM;
+ goto out;
+ }
+ agf = XFS_BUF_TO_AGF(agf_bp);
+ fdblocks += be32_to_cpu(agf->agf_freeblks);
+ fdblocks += be32_to_cpu(agf->agf_flcount);
+ fdblocks += be32_to_cpu(agf->agf_btreeblks);
+ }
+
+ /*
+ * Reinitialize the counters. The on-disk and in-core counters
+ * differ by the number of inodes/blocks reserved by the admin,
+ * the per-AG reservation, and any transactions in progress, so
+ * we have to account for that.
+ */
+ spin_lock(&mp->m_sb_lock);
+ delta_icount = (int64_t)mp->m_sb.sb_icount - icount;
+ delta_ifree = (int64_t)mp->m_sb.sb_ifree - ifree;
+ delta_fdblocks = (int64_t)mp->m_sb.sb_fdblocks - fdblocks;
+ mp->m_sb.sb_icount = icount;
+ mp->m_sb.sb_ifree = ifree;
+ mp->m_sb.sb_fdblocks = fdblocks;
+ spin_unlock(&mp->m_sb_lock);
+
+ if (delta_icount) {
+ error = xfs_mod_icount(mp, delta_icount);
+ if (error)
+ goto out;
+ }
+ if (delta_ifree) {
+ error = xfs_mod_ifree(mp, delta_ifree);
+ if (error)
+ goto out;
+ }
+ if (delta_fdblocks) {
+ error = xfs_mod_fdblocks(mp, delta_fdblocks, false);
+ if (error)
+ goto out;
+ }
+
+out:
+ xfs_trans_cancel(tp);
+ return error;
+}
+
+/* Figure out how many blocks to reserve for an AG repair. */
+xfs_extlen_t
+xfs_repair_calc_ag_resblks(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_scrub_metadata *sm = sc->sm;
+ struct xfs_agi *agi;
+ struct xfs_agf *agf;
+ struct xfs_buf *bp;
+ xfs_agino_t icount;
+ xfs_extlen_t aglen;
+ xfs_extlen_t usedlen;
+ xfs_extlen_t freelen;
+ xfs_extlen_t bnobt_sz;
+ xfs_extlen_t inobt_sz;
+ xfs_extlen_t rmapbt_sz;
+ xfs_extlen_t refcbt_sz;
+ int error;
+
+ if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
+ return 0;
+
+ /*
+ * Try to get the actual counters from disk; if not, make
+ * some worst case assumptions.
+ */
+ error = xfs_read_agi(mp, NULL, sm->sm_agno, &bp);
+ if (!error) {
+ agi = XFS_BUF_TO_AGI(bp);
+ icount = be32_to_cpu(agi->agi_count);
+ xfs_trans_brelse(NULL, bp);
+ } else
+ icount = mp->m_sb.sb_agblocks / mp->m_sb.sb_inopblock;
+
+ error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp);
+ if (!error && bp) {
+ agf = XFS_BUF_TO_AGF(bp);
+ aglen = be32_to_cpu(agf->agf_length);
+ freelen = be32_to_cpu(agf->agf_freeblks);
+ usedlen = aglen - freelen;
+ xfs_trans_brelse(NULL, bp);
+ } else {
+ aglen = mp->m_sb.sb_agblocks;
+ freelen = aglen;
+ usedlen = aglen;
+ }
+
+ trace_xfs_repair_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
+ freelen, usedlen);
+
+ /*
+ * Figure out how many blocks we'd need worst case to rebuild
+ * each type of btree. Note that we can only rebuild the
+ * bnobt/cntbt or inobt/finobt as pairs.
+ */
+ bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen);
+ if (xfs_sb_version_hassparseinodes(&mp->m_sb))
+ inobt_sz = xfs_iallocbt_calc_size(mp, icount /
+ XFS_INODES_PER_HOLEMASK_BIT);
+ else
+ inobt_sz = xfs_iallocbt_calc_size(mp, icount /
+ XFS_INODES_PER_CHUNK);
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ inobt_sz *= 2;
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ rmapbt_sz = xfs_rmapbt_calc_size(mp, aglen);
+ refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen);
+ } else {
+ rmapbt_sz = xfs_rmapbt_calc_size(mp, usedlen);
+ refcbt_sz = 0;
+ }
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ rmapbt_sz = 0;
+
+ trace_xfs_repair_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
+ inobt_sz, rmapbt_sz, refcbt_sz);
+
+ return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
new file mode 100644
index 0000000..e0d3690
--- /dev/null
+++ b/fs/xfs/scrub/repair.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_SCRUB_REPAIR_H__
+#define __XFS_SCRUB_REPAIR_H__
+
+/* Repair helpers */
+
+struct xfs_repair_find_ag_btree {
+ uint64_t rmap_owner;
+ const struct xfs_buf_ops *buf_ops;
+ uint32_t magic;
+ xfs_agblock_t root;
+ unsigned int level;
+};
+
+struct xfs_repair_btree_extent {
+ struct list_head list;
+ xfs_fsblock_t fsbno;
+ xfs_extlen_t len;
+};
+
+int xfs_repair_roll_ag_trans(struct xfs_scrub_context *sc);
+bool xfs_repair_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
+ enum xfs_ag_resv_type type);
+int xfs_repair_alloc_ag_block(struct xfs_scrub_context *sc,
+ struct xfs_owner_info *oinfo,
+ xfs_fsblock_t *fsbno, enum xfs_ag_resv_type resv);
+int xfs_repair_init_btblock(struct xfs_scrub_context *sc, xfs_fsblock_t fsb,
+ struct xfs_buf **bpp, xfs_btnum_t btnum,
+ const struct xfs_buf_ops *ops);
+int xfs_repair_fix_freelist(struct xfs_scrub_context *sc, bool can_shrink);
+int xfs_repair_put_freelist(struct xfs_scrub_context *sc, xfs_agblock_t agbno);
+int xfs_repair_collect_btree_extent(struct xfs_scrub_context *sc,
+ struct list_head *btlist,
+ xfs_fsblock_t fsbno, xfs_extlen_t len);
+int xfs_repair_invalidate_blocks(struct xfs_scrub_context *sc,
+ struct list_head *btlist);
+int xfs_repair_reap_btree_extents(struct xfs_scrub_context *sc,
+ struct list_head *btlist,
+ struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type);
+void xfs_repair_cancel_btree_extents(struct xfs_scrub_context *sc,
+ struct list_head *btlist);
+int xfs_repair_subtract_extents(struct xfs_scrub_context *sc,
+ struct list_head *exlist,
+ struct list_head *sublist);
+int xfs_repair_find_ag_btree_roots(struct xfs_scrub_context *sc,
+ struct xfs_buf *agf_bp,
+ struct xfs_repair_find_ag_btree *btree_info,
+ struct xfs_buf *agfl_bp);
+int xfs_repair_reset_counters(struct xfs_mount *mp);
+xfs_extlen_t xfs_repair_calc_ag_resblks(struct xfs_scrub_context *sc);
+int xfs_repair_setup_btree_extent_collection(struct xfs_scrub_context *sc);
+
+/* Metadata repairers */
+
+#endif /* __XFS_SCRUB_REPAIR_H__ */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index cdc8233..9c2372e 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -51,6 +51,7 @@
#include "scrub/trace.h"
#include "scrub/scrub.h"
#include "scrub/btree.h"
+#include "scrub/repair.h"
/*
* Online Scrub and Repair
@@ -199,6 +200,8 @@ xfs_scrub_teardown(
kmem_free(sc->buf);
sc->buf = NULL;
}
+ if (sc->reset_counters && !error)
+ error = xfs_repair_reset_counters(sc->mp);
return error;
}
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 0713eda..70adf0c 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -39,6 +39,14 @@ static inline bool xfs_scrub_should_fix(struct xfs_scrub_metadata *sm)
XFS_SCRUB_OFLAG_PREEN));
}
+/* Are we here only for preening? */
+static inline bool xfs_scrub_preen_only(struct xfs_scrub_metadata *sm)
+{
+ return (sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
+ XFS_SCRUB_OFLAG_XCORRUPT |
+ XFS_SCRUB_OFLAG_PREEN)) == XFS_SCRUB_OFLAG_PREEN;
+}
+
/* Buffer pointers and btree cursors for an entire AG. */
struct xfs_scrub_ag {
xfs_agnumber_t agno;
@@ -67,6 +75,7 @@ struct xfs_scrub_context {
void *buf;
uint ilock_flags;
bool try_harder;
+ bool reset_counters;
/* State tracking for single-AG operations. */
struct xfs_scrub_ag sa;
next prev parent reply other threads:[~2017-08-25 22:17 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-25 22:16 [PATCH v9 00/19] xfs: online fs repair support Darrick J. Wong
2017-08-25 22:16 ` [PATCH 01/19] xfs: add helpers to calculate btree size Darrick J. Wong
2017-08-25 22:17 ` [PATCH 02/19] xfs: expose various functions to repair code Darrick J. Wong
2017-08-25 22:17 ` [PATCH 03/19] xfs: add repair helpers for the reverse mapping btree Darrick J. Wong
2017-08-25 22:17 ` [PATCH 04/19] xfs: add repair helpers for the reference count btree Darrick J. Wong
2017-08-25 22:17 ` [PATCH 05/19] xfs: add BMAPI_NORMAP flag to perform block remapping without updating rmpabt Darrick J. Wong
2017-08-25 22:17 ` [PATCH 06/19] xfs: halt auto-reclamation activities while rebuilding rmap Darrick J. Wong
2017-08-25 22:17 ` [PATCH 07/19] xfs: create tracepoints for online repair Darrick J. Wong
2017-08-25 22:17 ` [PATCH 08/19] xfs: implement the metadata repair ioctl flag Darrick J. Wong
2017-08-25 22:17 ` Darrick J. Wong [this message]
2017-08-25 22:17 ` [PATCH 10/19] xfs: repair superblocks Darrick J. Wong
2017-08-25 22:18 ` [PATCH 11/19] xfs: repair the AGF and AGFL Darrick J. Wong
2017-08-25 22:18 ` [PATCH 12/19] xfs: rebuild the AGI Darrick J. Wong
2017-08-25 22:18 ` [PATCH 13/19] xfs: repair free space btrees Darrick J. Wong
2017-08-25 22:18 ` [PATCH 14/19] xfs: repair inode btrees Darrick J. Wong
2017-08-25 22:18 ` [PATCH 15/19] xfs: rebuild the rmapbt Darrick J. Wong
2017-08-25 22:18 ` [PATCH 16/19] xfs: repair refcount btrees Darrick J. Wong
2017-08-25 22:18 ` [PATCH 17/19] xfs: online repair of inodes Darrick J. Wong
2017-08-25 22:18 ` [PATCH 18/19] xfs: repair inode block maps Darrick J. Wong
2017-08-25 22:18 ` [PATCH 19/19] xfs: repair damaged symlinks Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=150369947182.9957.11780536280009861815.stgit@magnolia \
--to=darrick.wong@oracle.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).