From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: sandeen@redhat.com, darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 1/9] xfs_repair: rebuild bmbt from rmapbt data
Date: Fri, 10 Mar 2017 15:24:51 -0800 [thread overview]
Message-ID: <148918829104.8311.15211853599014518835.stgit@birch.djwong.org> (raw)
In-Reply-To: <148918828436.8311.8130426069001200240.stgit@birch.djwong.org>
From: Darrick J. Wong <darrick.wong@oracle.com>
Use rmap records to rebuild corrupt inode forks instead of zapping
the whole inode.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/libxfs_api_defs.h | 2
repair/Makefile | 5 -
repair/dino_chunks.c | 7 +
repair/dinode.c | 41 +++++++
repair/rebuild.c | 277 ++++++++++++++++++++++++++++++++++++++++++++++
repair/rebuild.h | 26 ++++
repair/rmap.c | 2
repair/rmap.h | 1
8 files changed, 357 insertions(+), 4 deletions(-)
create mode 100644 repair/rebuild.c
create mode 100644 repair/rebuild.h
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index d299b7a..f01fff0 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -146,5 +146,7 @@
#define xfs_rmap_lookup_le_range libxfs_rmap_lookup_le_range
#define xfs_refc_block libxfs_refc_block
#define xfs_rmap_compare libxfs_rmap_compare
+#define xfs_bmbt_calc_size libxfs_bmbt_calc_size
+#define xfs_rmap_query_all libxfs_rmap_query_all
#endif /* __LIBXFS_API_DEFS_H__ */
diff --git a/repair/Makefile b/repair/Makefile
index b7e8fd5..9edaf18 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -11,14 +11,15 @@ LTCOMMAND = xfs_repair
HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h btree.h \
da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
- rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
+ rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h \
+ rebuild.h
CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c btree.c \
da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
progress.c prefetch.c rmap.c rt.c sb.c scan.c slab.c threads.c \
- versions.c xfs_repair.c
+ versions.c rebuild.c xfs_repair.c
LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBXCMD) $(LIBUUID) \
$(LIBRT) $(LIBPTHREAD) $(LIBBLKID)
diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c
index a3909ac..c479f2c 100644
--- a/repair/dino_chunks.c
+++ b/repair/dino_chunks.c
@@ -697,6 +697,13 @@ process_inode_chunk(
irec_offset += mp->m_sb.sb_inopblock * blks_per_cluster;
agbno += blks_per_cluster;
}
+ /*
+ * Allow the buffer to be re-locked by this thread in case
+ * we want to rebuild an inode fork.
+ */
+ for (bp_index = 0; bp_index < cluster_count; bp_index++)
+ if (bplist[bp_index])
+ bplist[bp_index]->b_flags |= LIBXFS_B_RECURSIVE_LOCK;
agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
/*
diff --git a/repair/dinode.c b/repair/dinode.c
index d664f87..6f71c2f 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -32,6 +32,7 @@
#include "threads.h"
#include "slab.h"
#include "rmap.h"
+#include "rebuild.h"
/*
* gettext lookups for translations of strings use mutexes internally to
@@ -1915,7 +1916,9 @@ process_inode_data_fork(
xfs_ino_t lino = XFS_AGINO_TO_INO(mp, agno, ino);
int err = 0;
int nex;
+ bool try_rebuild = !rmapbt_suspect;
+retry:
/*
* extent count on disk is only valid for positive values. The kernel
* uses negative values in memory. hence if we see negative numbers
@@ -1961,8 +1964,25 @@ process_inode_data_fork(
if (err) {
do_warn(_("bad data fork in inode %" PRIu64 "\n"), lino);
if (!no_modify) {
+ if (try_rebuild) {
+ do_warn(
+_("rebuilding inode %"PRIu64" data fork\n"),
+ lino);
+ try_rebuild = false;
+ err = rebuild_bmap(mp, lino, XFS_DATA_FORK,
+ be32_to_cpu(dino->di_nextents));
+ if (!err)
+ goto retry;
+ do_warn(
+_("inode %"PRIu64" data fork rebuild failed, error %d\n"),
+ lino, err);
+ }
*dirty += clear_dinode(mp, dino, lino);
ASSERT(*dirty > 0);
+ } else if (try_rebuild) {
+ do_warn(
+_("would have tried to rebuild inode %"PRIu64" data fork, or else\n"),
+ lino);
}
return 1;
}
@@ -2026,7 +2046,9 @@ process_inode_attr_fork(
blkmap_t *ablkmap = NULL;
int repair = 0;
int err;
+ bool try_rebuild = !rmapbt_suspect;
+retry:
if (!XFS_DFORK_Q(dino)) {
*anextents = 0;
if (dino->di_aformat != XFS_DINODE_FMT_EXTENTS) {
@@ -2085,6 +2107,19 @@ process_inode_attr_fork(
do_warn(_("bad attribute fork in inode %" PRIu64), lino);
if (!no_modify) {
+ if (try_rebuild) {
+ try_rebuild = false;
+ do_warn(
+_("rebuilding inode %"PRIu64" attr fork\n"),
+ lino);
+ err = rebuild_bmap(mp, lino, XFS_DATA_FORK,
+ be32_to_cpu(dino->di_nextents));
+ if (!err)
+ goto retry;
+ do_warn(
+_("inode %"PRIu64" attr fork rebuild failed, error %d\n"),
+ lino, err);
+ }
if (delete_attr_ok) {
do_warn(_(", clearing attr fork\n"));
*dirty += clear_dinode_attr(mp, dino, lino);
@@ -2094,7 +2129,11 @@ process_inode_attr_fork(
*dirty += clear_dinode(mp, dino, lino);
}
ASSERT(*dirty > 0);
- } else {
+ } else if (try_rebuild) {
+ do_warn(
+_("would have tried to rebuild inode %"PRIu64" attr fork or cleared it\n"),
+ lino);
+ } else {
do_warn(_(", would clear attr fork\n"));
}
diff --git a/repair/rebuild.c b/repair/rebuild.c
new file mode 100644
index 0000000..bd5d6a8
--- /dev/null
+++ b/repair/rebuild.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include <libxfs.h>
+#include "btree.h"
+#include "err_protos.h"
+#include "libxlog.h"
+#include "incore.h"
+#include "globals.h"
+#include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
+
+/* Borrowed routines from xfs_scrub.c */
+
+struct xfs_repair_bmap_extent {
+ struct xfs_rmap_irec rmap;
+ xfs_agnumber_t agno;
+};
+
+struct xfs_repair_bmap {
+ struct xfs_slab *extslab;
+ xfs_ino_t ino;
+ xfs_rfsblock_t bmbt_blocks;
+ int whichfork;
+};
+
+/* Record extents that belong to this inode's fork. */
+STATIC int
+xfs_repair_bmap_extent_fn(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_repair_bmap *rb = priv;
+ struct xfs_repair_bmap_extent rbe;
+
+ /* Skip extents which are not owned by this inode and fork. */
+ if (rec->rm_owner != rb->ino)
+ return 0;
+ else if (rb->whichfork == XFS_DATA_FORK &&
+ (rec->rm_flags & XFS_RMAP_ATTR_FORK))
+ return 0;
+ else if (rb->whichfork == XFS_ATTR_FORK &&
+ !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
+ return 0;
+ else if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
+ rb->bmbt_blocks += rec->rm_blockcount;
+ return 0;
+ }
+
+ rbe.rmap = *rec;
+ rbe.agno = cur->bc_private.a.agno;
+ return slab_add(rb->extslab, &rbe);
+}
+
+/* Compare two bmap extents. */
+static int
+xfs_repair_bmap_extent_cmp(
+ const void *a,
+ const void *b)
+{
+ const struct xfs_repair_bmap_extent *ap = a;
+ const struct xfs_repair_bmap_extent *bp = b;
+
+ if (ap->rmap.rm_offset > bp->rmap.rm_offset)
+ return 1;
+ else if (ap->rmap.rm_offset < bp->rmap.rm_offset)
+ return -1;
+ return 0;
+}
+
+/* Repair an inode fork. */
+STATIC int
+xfs_repair_bmap(
+ struct xfs_inode *ip,
+ struct xfs_trans **tpp,
+ int whichfork)
+{
+ struct xfs_repair_bmap rb = {0};
+ struct xfs_bmbt_irec bmap;
+ struct xfs_defer_ops dfops;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buf *agf_bp = NULL;
+ struct xfs_repair_bmap_extent *rbe;
+ struct xfs_btree_cur *cur;
+ struct xfs_slab_cursor *scur = NULL;
+ xfs_fsblock_t firstfsb;
+ xfs_agnumber_t agno;
+ xfs_extlen_t extlen;
+ int baseflags;
+ int flags;
+ int nimaps;
+ int error = 0;
+
+ ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
+
+ /* Don't know how to repair the other fork formats. */
+ if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+ return ENOTTY;
+
+ /* Only files, symlinks, and directories get to have data forks. */
+ if (whichfork == XFS_DATA_FORK && !S_ISREG(VFS_I(ip)->i_mode) &&
+ !S_ISDIR(VFS_I(ip)->i_mode) && !S_ISLNK(VFS_I(ip)->i_mode))
+ return EINVAL;
+
+ /* If we somehow have delalloc extents, forget it. */
+ if (whichfork == XFS_DATA_FORK && ip->i_delayed_blks)
+ return EBUSY;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return EOPNOTSUPP;
+
+ /* Don't know how to rebuild realtime data forks. */
+ if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK)
+ return EOPNOTSUPP;
+
+ /* Collect all reverse mappings for this fork's extents. */
+ init_slab(&rb.extslab, sizeof(*rbe));
+ rb.ino = ip->i_ino;
+ rb.whichfork = whichfork;
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ error = -libxfs_alloc_read_agf(mp, *tpp, agno, 0, &agf_bp);
+ if (error)
+ goto out;
+ cur = libxfs_rmapbt_init_cursor(mp, *tpp, agf_bp, agno);
+ error = -libxfs_rmap_query_all(cur, xfs_repair_bmap_extent_fn, &rb);
+ libxfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ if (error)
+ goto out;
+ }
+
+ /* Blow out the in-core fork and zero the on-disk fork. */
+ libxfs_trans_ijoin(*tpp, ip, 0);
+ if (XFS_IFORK_PTR(ip, whichfork) != NULL)
+ libxfs_idestroy_fork(ip, whichfork);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+ XFS_IFORK_NEXT_SET(ip, whichfork, 0);
+
+ /* Reinitialize the on-disk fork. */
+ if (whichfork == XFS_DATA_FORK) {
+ memset(&ip->i_df, 0, sizeof(struct xfs_ifork));
+ ip->i_df.if_flags |= XFS_IFEXTENTS;
+ } else if (whichfork == XFS_ATTR_FORK) {
+ if (slab_count(rb.extslab) == 0)
+ ip->i_afp = NULL;
+ else {
+ ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_NOFS);
+ ip->i_afp->if_flags |= XFS_IFEXTENTS;
+ }
+ }
+ libxfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+ error = -libxfs_trans_roll(tpp, ip);
+ if (error)
+ goto out;
+
+ baseflags = XFS_BMAPI_REMAP | XFS_BMAPI_NORMAP;
+ if (whichfork == XFS_ATTR_FORK)
+ baseflags |= XFS_BMAPI_ATTRFORK;
+
+ /* "Remap" the extents into the fork. */
+ init_slab_cursor(rb.extslab, xfs_repair_bmap_extent_cmp, &scur);
+ rbe = pop_slab_cursor(scur);
+ while (rbe != NULL) {
+ /* Form the "new" mapping... */
+ bmap.br_startblock = XFS_AGB_TO_FSB(mp, rbe->agno,
+ rbe->rmap.rm_startblock);
+ bmap.br_startoff = rbe->rmap.rm_offset;
+ flags = 0;
+ if (rbe->rmap.rm_flags & XFS_RMAP_UNWRITTEN)
+ flags = XFS_BMAPI_PREALLOC;
+ while (rbe->rmap.rm_blockcount > 0) {
+ libxfs_defer_init(&dfops, &firstfsb);
+ extlen = min(rbe->rmap.rm_blockcount, MAXEXTLEN);
+ bmap.br_blockcount = extlen;
+
+ /* Drop the block counter... */
+ ip->i_d.di_nblocks -= extlen;
+
+ /* Re-add the extent to the fork. */
+ nimaps = 1;
+ firstfsb = bmap.br_startblock;
+ error = -libxfs_bmapi_write(*tpp, ip,
+ bmap.br_startoff,
+ extlen, baseflags | flags, &firstfsb,
+ extlen, &bmap, &nimaps,
+ &dfops);
+ if (error)
+ goto out;
+
+ bmap.br_startblock += extlen;
+ bmap.br_startoff += extlen;
+ rbe->rmap.rm_blockcount -= extlen;
+ error = -libxfs_defer_finish(tpp, &dfops, ip);
+ if (error)
+ goto out;
+ /* Make sure we roll the transaction. */
+ error = -libxfs_trans_roll(tpp, ip);
+ if (error)
+ goto out;
+ }
+ rbe = pop_slab_cursor(scur);
+ }
+ free_slab_cursor(&scur);
+ free_slab(&rb.extslab);
+
+ /* Decrease nblocks to reflect the freed bmbt blocks. */
+ if (rb.bmbt_blocks) {
+ ip->i_d.di_nblocks -= rb.bmbt_blocks;
+ libxfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+ error = -libxfs_trans_roll(tpp, ip);
+ if (error)
+ goto out;
+ }
+
+ return error;
+out:
+ if (scur)
+ free_slab_cursor(&scur);
+ if (rb.extslab)
+ free_slab(&rb.extslab);
+ return error;
+}
+
+/* Rebuild some inode's bmap. */
+int
+rebuild_bmap(
+ struct xfs_mount *mp,
+ xfs_ino_t ino,
+ int whichfork,
+ unsigned long nr_extents)
+{
+ struct xfs_inode *ip;
+ struct xfs_trans *tp;
+ unsigned long long resblks;
+ int error;
+
+ resblks = libxfs_bmbt_calc_size(mp, nr_extents);
+ error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+ resblks, 0, 0, &tp);
+ if (error)
+ return error;
+ error = -libxfs_iget(mp, NULL, ino, 0, &ip);
+ if (error)
+ goto out_trans;
+ error = xfs_repair_bmap(ip, &tp, whichfork);
+ if (error)
+ goto out_irele;
+
+ error = -libxfs_trans_commit(tp);
+ IRELE(ip);
+ return error;
+out_irele:
+ IRELE(ip);
+out_trans:
+ libxfs_trans_cancel(tp);
+ return error;
+}
diff --git a/repair/rebuild.h b/repair/rebuild.h
new file mode 100644
index 0000000..51a44ea
--- /dev/null
+++ b/repair/rebuild.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef REBUILD_H_
+#define REBUILD_H_
+
+int rebuild_bmap(struct xfs_mount *mp, xfs_ino_t ino, int whichfork,
+ unsigned long nr_extents);
+
+#endif /* REBUILD_H_ */
diff --git a/repair/rmap.c b/repair/rmap.c
index ab6e583..af37829 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -46,7 +46,7 @@ struct xfs_ag_rmap {
};
static struct xfs_ag_rmap *ag_rmaps;
-static bool rmapbt_suspect;
+bool rmapbt_suspect;
static bool refcbt_suspect;
static inline int rmap_compare(const void *a, const void *b)
diff --git a/repair/rmap.h b/repair/rmap.h
index 752ece8..c970942 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -21,6 +21,7 @@
#define RMAP_H_
extern bool collect_rmaps;
+extern bool rmapbt_suspect;
extern bool rmap_needs_work(struct xfs_mount *);
next prev parent reply other threads:[~2017-03-10 23:24 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-03-10 23:24 [PATCH v6 0/9] xfsprogs: online scrub/repair support Darrick J. Wong
2017-03-10 23:24 ` Darrick J. Wong [this message]
2017-03-10 23:24 ` [PATCH 2/9] xfs_db: introduce fuzz command Darrick J. Wong
2017-03-10 23:25 ` [PATCH 3/9] xfs_db: print attribute remote value blocks Darrick J. Wong
2017-03-10 23:25 ` [PATCH 4/9] xfs_db: write / fuzz bad values into dir/attr blocks with good CRCs Darrick J. Wong
2017-03-10 23:25 ` [PATCH 5/9] xfs_io: provide an interface to the scrub ioctls Darrick J. Wong
2017-03-10 23:25 ` [PATCH 6/9] xfs_scrub: create online filesystem scrub program Darrick J. Wong
2017-03-10 23:25 ` [PATCH 7/9] xfs_scrub: add XFS-specific scrubbing functionality Darrick J. Wong
2017-03-10 23:25 ` [PATCH 8/9] xfs_scrub: create a script to scrub all xfs filesystems Darrick J. Wong
2017-03-10 23:25 ` [PATCH 9/9] xfs_scrub: integrate services with systemd Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=148918829104.8311.15211853599014518835.stgit@birch.djwong.org \
--to=darrick.wong@oracle.com \
--cc=linux-xfs@vger.kernel.org \
--cc=sandeen@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).