linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/9] xfs_repair: use btree bulk loading
@ 2020-01-01  1:21 Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
                   ` (8 more replies)
  0 siblings, 9 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

Hi all,

Refactor xfs_repair to use the btree bulk loading code instead of the
open-coded versions in phase5.c.  This isn't a full submission; it's
merely a demonstration of how the kernel patches can be used in
userspace.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=repair-bulk-load

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
@ 2020-01-01  1:21 ` Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 2/9] xfs_repair: unindent phase 5 function Darrick J. Wong
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Port the new btree staging context and related block reservation helper
code from the kernel to repair.  We'll use this in subsequent patches to
implement btree bulk loading.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    2 
 repair/Makefile          |    4 -
 repair/bload.c           |  276 ++++++++++++++++++++++++++++++++++++++++++++++
 repair/bload.h           |   79 +++++++++++++
 repair/xfs_repair.c      |   17 +++
 5 files changed, 376 insertions(+), 2 deletions(-)
 create mode 100644 repair/bload.c
 create mode 100644 repair/bload.h


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 355f99a2..6bab5a70 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -50,6 +50,8 @@
 #define xfs_attr_remove			libxfs_attr_remove
 #define xfs_attr_leaf_newentsize	libxfs_attr_leaf_newentsize
 
+#define xfs_alloc_vextent		libxfs_alloc_vextent
+#define __xfs_bmap_add_free		__libxfs_bmap_add_free
 #define xfs_agfl_walk			libxfs_agfl_walk
 #define xfs_alloc_fix_freelist		libxfs_alloc_fix_freelist
 #define xfs_alloc_min_freelist		libxfs_alloc_min_freelist
diff --git a/repair/Makefile b/repair/Makefile
index 0964499a..8cc1ee68 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -9,11 +9,11 @@ LSRCFILES = README
 
 LTCOMMAND = xfs_repair
 
-HFILES = agheader.h attr_repair.h avl.h bmap.h btree.h \
+HFILES = agheader.h attr_repair.h avl.h bload.h bmap.h btree.h \
 	da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
 	rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
 
-CFILES = agheader.c attr_repair.c avl.c bmap.c btree.c \
+CFILES = agheader.c attr_repair.c avl.c bload.c bmap.c btree.c \
 	da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
 	incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
 	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
diff --git a/repair/bload.c b/repair/bload.c
new file mode 100644
index 00000000..896e2ae6
--- /dev/null
+++ b/repair/bload.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <libxfs.h>
+#include "bload.h"
+
+#define trace_xrep_newbt_alloc_block(...)	((void) 0)
+#define trace_xrep_newbt_reserve_space(...)	((void) 0)
+#define trace_xrep_newbt_unreserve_space(...)	((void) 0)
+#define trace_xrep_newbt_alloc_block(...)	((void) 0)
+
+int bload_leaf_slack = -1;
+int bload_node_slack = -1;
+
+/* Ported routines from fs/xfs/scrub/repair.c */
+
+/*
+ * Roll a transaction, keeping the AG headers locked and reinitializing
+ * the btree cursors.
+ */
+int
+xrep_roll_ag_trans(
+	struct repair_ctx	*sc)
+{
+	int			error;
+
+	/* Keep the AG header buffers locked so we can keep going. */
+	if (sc->agi_bp)
+		libxfs_trans_bhold(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bhold(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
+
+	/*
+	 * Roll the transaction.  We still own the buffer and the buffer lock
+	 * regardless of whether or not the roll succeeds.  If the roll fails,
+	 * the buffers will be released during teardown on our way out of the
+	 * kernel.  If it succeeds, we join them to the new transaction and
+	 * move on.
+	 */
+	error = -libxfs_trans_roll(&sc->tp);
+	if (error)
+		return error;
+
+	/* Join AG headers to the new transaction. */
+	if (sc->agi_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
+
+	return 0;
+}
+
+/* Initialize accounting resources for staging a new AG btree. */
+void
+xrep_newbt_init_ag(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_fsblock_t			alloc_hint,
+	enum xfs_ag_resv_type		resv)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = alloc_hint;
+	xnr->resv = resv;
+	INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/* Initialize accounting resources for staging a new inode fork btree. */
+void
+xrep_newbt_init_inode(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	int				whichfork,
+	const struct xfs_owner_info	*oinfo)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino);
+	xnr->resv = XFS_AG_RESV_NONE;
+	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
+	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
+	INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/*
+ * Initialize accounting resources for staging a new btree.  Callers are
+ * expected to add their own reservations (and clean them up) manually.
+ */
+void
+xrep_newbt_init_bare(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc)
+{
+	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
+			XFS_AG_RESV_NONE);
+}
+
+/* Add a space reservation manually. */
+int
+xrep_newbt_add_reservation(
+	struct xrep_newbt		*xnr,
+	xfs_fsblock_t			fsbno,
+	xfs_extlen_t			len,
+	void				*priv)
+{
+	struct xrep_newbt_resv	*resv;
+
+	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
+	if (!resv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&resv->list);
+	resv->fsbno = fsbno;
+	resv->len = len;
+	resv->used = 0;
+	resv->priv = priv;
+	list_add_tail(&resv->list, &xnr->reservations);
+	return 0;
+}
+
+/* Reserve disk space for our new btree. */
+int
+xrep_newbt_reserve_space(
+	struct xrep_newbt	*xnr,
+	uint64_t		nr_blocks)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	xfs_alloctype_t		type;
+	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
+	int			error = 0;
+
+	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
+
+	while (nr_blocks > 0 && !error) {
+		struct xfs_alloc_arg	args = {
+			.tp		= sc->tp,
+			.mp		= sc->mp,
+			.type		= type,
+			.fsbno		= alloc_hint,
+			.oinfo		= xnr->oinfo,
+			.minlen		= 1,
+			.maxlen		= nr_blocks,
+			.prod		= nr_blocks,
+			.resv		= xnr->resv,
+		};
+
+		error = -libxfs_alloc_vextent(&args);
+		if (error)
+			return error;
+		if (args.fsbno == NULLFSBLOCK)
+			return -ENOSPC;
+
+		trace_xrep_newbt_reserve_space(sc->mp,
+				XFS_FSB_TO_AGNO(sc->mp, args.fsbno),
+				XFS_FSB_TO_AGBNO(sc->mp, args.fsbno),
+				args.len, xnr->oinfo.oi_owner);
+
+		/* We don't have real EFIs here so skip that. */
+
+		error = xrep_newbt_add_reservation(xnr, args.fsbno, args.len,
+				NULL);
+		if (error)
+			break;
+
+		nr_blocks -= args.len;
+		alloc_hint = args.fsbno + args.len - 1;
+
+		if (sc->ip)
+			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
+		else
+			error = xrep_roll_ag_trans(sc);
+	}
+
+	return error;
+}
+
+/* Free all the accounting infor and disk space we reserved for a new btree. */
+void
+xrep_newbt_destroy(
+	struct xrep_newbt	*xnr,
+	int			error)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	struct xrep_newbt_resv	*resv, *n;
+
+	if (error)
+		goto junkit;
+
+	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+		/* We don't have EFIs here so skip the EFD. */
+
+		/* Free every block we didn't use. */
+		resv->fsbno += resv->used;
+		resv->len -= resv->used;
+		resv->used = 0;
+
+		if (resv->len > 0) {
+			trace_xrep_newbt_unreserve_space(sc->mp,
+					XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
+					XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
+					resv->len, xnr->oinfo.oi_owner);
+
+			__libxfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
+					&xnr->oinfo, true);
+		}
+
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+junkit:
+	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+	if (sc->ip) {
+		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
+		xnr->ifake.if_fork = NULL;
+	}
+}
+
+/* Feed one of the reserved btree blocks to the bulk loader. */
+int
+xrep_newbt_alloc_block(
+	struct xfs_btree_cur	*cur,
+	struct xrep_newbt	*xnr,
+	union xfs_btree_ptr	*ptr)
+{
+	struct xrep_newbt_resv	*resv;
+	xfs_fsblock_t		fsb;
+
+	/*
+	 * If last_resv doesn't have a block for us, move forward until we find
+	 * one that does (or run out of reservations).
+	 */
+	if (xnr->last_resv == NULL) {
+		list_for_each_entry(resv, &xnr->reservations, list) {
+			if (resv->used < resv->len) {
+				xnr->last_resv = resv;
+				break;
+			}
+		}
+		if (xnr->last_resv == NULL)
+			return -ENOSPC;
+	} else if (xnr->last_resv->used == xnr->last_resv->len) {
+		if (xnr->last_resv->list.next == &xnr->reservations)
+			return -ENOSPC;
+		xnr->last_resv = list_entry(xnr->last_resv->list.next,
+				struct xrep_newbt_resv, list);
+	}
+
+	/* Nab the block. */
+	fsb = xnr->last_resv->fsbno + xnr->last_resv->used;
+	xnr->last_resv->used++;
+
+	trace_xrep_newbt_alloc_block(cur->bc_mp,
+			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
+			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
+			xnr->oinfo.oi_owner);
+
+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+		ptr->l = cpu_to_be64(fsb);
+	else
+		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
+	return 0;
+}
diff --git a/repair/bload.h b/repair/bload.h
new file mode 100644
index 00000000..8f890157
--- /dev/null
+++ b/repair/bload.h
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_REPAIR_BLOAD_H__
+#define __XFS_REPAIR_BLOAD_H__
+
+extern int bload_leaf_slack;
+extern int bload_node_slack;
+
+struct repair_ctx {
+	struct xfs_mount	*mp;
+	struct xfs_inode	*ip;
+	struct xfs_trans	*tp;
+
+	struct xfs_buf		*agi_bp;
+	struct xfs_buf		*agf_bp;
+	struct xfs_buf		*agfl_bp;
+};
+
+struct xrep_newbt_resv {
+	/* Link to list of extents that we've reserved. */
+	struct list_head	list;
+
+	void			*priv;
+
+	/* FSB of the block we reserved. */
+	xfs_fsblock_t		fsbno;
+
+	/* Length of the reservation. */
+	xfs_extlen_t		len;
+
+	/* How much of this reservation we've used. */
+	xfs_extlen_t		used;
+};
+
+struct xrep_newbt {
+	struct repair_ctx	*sc;
+
+	/* List of extents that we've reserved. */
+	struct list_head	reservations;
+
+	/* Fake root for new btree. */
+	union {
+		struct xbtree_afakeroot	afake;
+		struct xbtree_ifakeroot	ifake;
+	};
+
+	/* rmap owner of these blocks */
+	struct xfs_owner_info	oinfo;
+
+	/* The last reservation we allocated from. */
+	struct xrep_newbt_resv	*last_resv;
+
+	/* Allocation hint */
+	xfs_fsblock_t		alloc_hint;
+
+	/* per-ag reservation type */
+	enum xfs_ag_resv_type	resv;
+};
+
+#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
+	list_for_each_entry_safe((resv), (n), &(xnr)->reservations, list)
+
+void xrep_newbt_init_bare(struct xrep_newbt *xba, struct repair_ctx *sc);
+void xrep_newbt_init_ag(struct xrep_newbt *xba, struct repair_ctx *sc,
+		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
+		enum xfs_ag_resv_type resv);
+void xrep_newbt_init_inode(struct xrep_newbt *xba, struct repair_ctx *sc,
+		int whichfork, const struct xfs_owner_info *oinfo);
+int xrep_newbt_add_reservation(struct xrep_newbt *xba, xfs_fsblock_t fsbno,
+		xfs_extlen_t len, void *priv);
+int xrep_newbt_reserve_space(struct xrep_newbt *xba, uint64_t nr_blocks);
+void xrep_newbt_destroy(struct xrep_newbt *xba, int error);
+int xrep_newbt_alloc_block(struct xfs_btree_cur *cur, struct xrep_newbt *xba,
+		union xfs_btree_ptr *ptr);
+
+#endif /* __XFS_REPAIR_BLOAD_H__ */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 66e2c335..ecfb1ff1 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -24,6 +24,7 @@
 #include "rmap.h"
 #include "libfrog/fsgeom.h"
 #include "libfrog/platform.h"
+#include "bload.h"
 
 /*
  * option tables for getsubopt calls
@@ -39,6 +40,8 @@ enum o_opt_nums {
 	AG_STRIDE,
 	FORCE_GEO,
 	PHASE2_THREADS,
+	BLOAD_LEAF_SLACK,
+	BLOAD_NODE_SLACK,
 	O_MAX_OPTS,
 };
 
@@ -49,6 +52,8 @@ static char *o_opts[] = {
 	[AG_STRIDE]		= "ag_stride",
 	[FORCE_GEO]		= "force_geometry",
 	[PHASE2_THREADS]	= "phase2_threads",
+	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
+	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
 	[O_MAX_OPTS]		= NULL,
 };
 
@@ -260,6 +265,18 @@ process_args(int argc, char **argv)
 		_("-o phase2_threads requires a parameter\n"));
 					phase2_threads = (int)strtol(val, NULL, 0);
 					break;
+				case BLOAD_LEAF_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_leaf_slack requires a parameter\n"));
+					bload_leaf_slack = (int)strtol(val, NULL, 0);
+					break;
+				case BLOAD_NODE_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_node_slack requires a parameter\n"));
+					bload_node_slack = (int)strtol(val, NULL, 0);
+					break;
 				default:
 					unknown('o', val);
 					break;


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/9] xfs_repair: unindent phase 5 function
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
@ 2020-01-01  1:21 ` Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Remove the unnecessary indent in phase5_func.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |  309 +++++++++++++++++++++++++++----------------------------
 1 file changed, 154 insertions(+), 155 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index 7f7d3d18..4108e22b 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -2237,201 +2237,200 @@ phase5_func(
 	if (verbose)
 		do_log(_("        - agno = %d\n"), agno);
 
-	{
-		/*
-		 * build up incore bno and bcnt extent btrees
-		 */
-		num_extents = mk_incore_fstree(mp, agno);
+	/*
+	 * build up incore bno and bcnt extent btrees
+	 */
+	num_extents = mk_incore_fstree(mp, agno);
 
 #ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "# of bno extents is %d\n",
-				count_bno_extents(agno));
+	fprintf(stderr, "# of bno extents is %d\n",
+			count_bno_extents(agno));
 #endif
 
-		if (num_extents == 0)  {
-			/*
-			 * XXX - what we probably should do here is pick an
-			 * inode for a regular file in the allocation group
-			 * that has space allocated and shoot it by traversing
-			 * the bmap list and putting all its extents on the
-			 * incore freespace trees, clearing the inode,
-			 * and clearing the in-use bit in the incore inode
-			 * tree.  Then try mk_incore_fstree() again.
-			 */
-			do_error(_("unable to rebuild AG %u.  "
-				  "Not enough free space in on-disk AG.\n"),
-				agno);
-		}
-
+	if (num_extents == 0)  {
 		/*
-		 * ok, now set up the btree cursors for the
-		 * on-disk btrees (includs pre-allocating all
-		 * required blocks for the trees themselves)
+		 * XXX - what we probably should do here is pick an
+		 * inode for a regular file in the allocation group
+		 * that has space allocated and shoot it by traversing
+		 * the bmap list and putting all its extents on the
+		 * incore freespace trees, clearing the inode,
+		 * and clearing the in-use bit in the incore inode
+		 * tree.  Then try mk_incore_fstree() again.
 		 */
-		init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos,
-				&num_free_inos, 0);
+		do_error(_("unable to rebuild AG %u.  "
+			  "Not enough free space in on-disk AG.\n"),
+			agno);
+	}
 
-		if (xfs_sb_version_hasfinobt(&mp->m_sb))
-			init_ino_cursor(mp, agno, &fino_btree_curs,
-					&finobt_num_inos, &finobt_num_free_inos,
-					1);
+	/*
+	 * ok, now set up the btree cursors for the
+	 * on-disk btrees (includs pre-allocating all
+	 * required blocks for the trees themselves)
+	 */
+	init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos,
+			&num_free_inos, 0);
 
-		sb_icount_ag[agno] += num_inos;
-		sb_ifree_ag[agno] += num_free_inos;
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		init_ino_cursor(mp, agno, &fino_btree_curs,
+				&finobt_num_inos, &finobt_num_free_inos,
+				1);
 
-		/*
-		 * Set up the btree cursors for the on-disk rmap btrees,
-		 * which includes pre-allocating all required blocks.
-		 */
-		init_rmapbt_cursor(mp, agno, &rmap_btree_curs);
+	sb_icount_ag[agno] += num_inos;
+	sb_ifree_ag[agno] += num_free_inos;
 
-		/*
-		 * Set up the btree cursors for the on-disk refcount btrees,
-		 * which includes pre-allocating all required blocks.
-		 */
-		init_refc_cursor(mp, agno, &refcnt_btree_curs);
+	/*
+	 * Set up the btree cursors for the on-disk rmap btrees,
+	 * which includes pre-allocating all required blocks.
+	 */
+	init_rmapbt_cursor(mp, agno, &rmap_btree_curs);
 
-		num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
+	/*
+	 * Set up the btree cursors for the on-disk refcount btrees,
+	 * which includes pre-allocating all required blocks.
+	 */
+	init_refc_cursor(mp, agno, &refcnt_btree_curs);
+
+	num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
+	/*
+	 * lose two blocks per AG -- the space tree roots
+	 * are counted as allocated since the space trees
+	 * always have roots
+	 */
+	sb_fdblocks_ag[agno] += num_freeblocks - 2;
+
+	if (num_extents == 0)  {
 		/*
-		 * lose two blocks per AG -- the space tree roots
-		 * are counted as allocated since the space trees
-		 * always have roots
+		 * XXX - what we probably should do here is pick an
+		 * inode for a regular file in the allocation group
+		 * that has space allocated and shoot it by traversing
+		 * the bmap list and putting all its extents on the
+		 * incore freespace trees, clearing the inode,
+		 * and clearing the in-use bit in the incore inode
+		 * tree.  Then try mk_incore_fstree() again.
 		 */
-		sb_fdblocks_ag[agno] += num_freeblocks - 2;
-
-		if (num_extents == 0)  {
-			/*
-			 * XXX - what we probably should do here is pick an
-			 * inode for a regular file in the allocation group
-			 * that has space allocated and shoot it by traversing
-			 * the bmap list and putting all its extents on the
-			 * incore freespace trees, clearing the inode,
-			 * and clearing the in-use bit in the incore inode
-			 * tree.  Then try mk_incore_fstree() again.
-			 */
-			do_error(
-			_("unable to rebuild AG %u.  No free space.\n"), agno);
-		}
+		do_error(
+		_("unable to rebuild AG %u.  No free space.\n"), agno);
+	}
 
 #ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "# of bno extents is %d\n", num_extents);
+	fprintf(stderr, "# of bno extents is %d\n", num_extents);
 #endif
 
-		/*
-		 * track blocks that we might really lose
-		 */
-		extra_blocks = calculate_freespace_cursor(mp, agno,
-					&num_extents, &bno_btree_curs);
+	/*
+	 * track blocks that we might really lose
+	 */
+	extra_blocks = calculate_freespace_cursor(mp, agno,
+				&num_extents, &bno_btree_curs);
 
-		/*
-		 * freespace btrees live in the "free space" but
-		 * the filesystem treats AGFL blocks as allocated
-		 * since they aren't described by the freespace trees
-		 */
+	/*
+	 * freespace btrees live in the "free space" but
+	 * the filesystem treats AGFL blocks as allocated
+	 * since they aren't described by the freespace trees
+	 */
 
-		/*
-		 * see if we can fit all the extra blocks into the AGFL
-		 */
-		extra_blocks = (extra_blocks - libxfs_agfl_size(mp) > 0)
-				? extra_blocks - libxfs_agfl_size(mp)
-				: 0;
+	/*
+	 * see if we can fit all the extra blocks into the AGFL
+	 */
+	extra_blocks = (extra_blocks - libxfs_agfl_size(mp) > 0)
+			? extra_blocks - libxfs_agfl_size(mp)
+			: 0;
 
-		if (extra_blocks > 0)
-			sb_fdblocks_ag[agno] -= extra_blocks;
+	if (extra_blocks > 0)
+		sb_fdblocks_ag[agno] -= extra_blocks;
 
-		bcnt_btree_curs = bno_btree_curs;
+	bcnt_btree_curs = bno_btree_curs;
 
-		bno_btree_curs.owner = XFS_RMAP_OWN_AG;
-		bcnt_btree_curs.owner = XFS_RMAP_OWN_AG;
-		setup_cursor(mp, agno, &bno_btree_curs);
-		setup_cursor(mp, agno, &bcnt_btree_curs);
+	bno_btree_curs.owner = XFS_RMAP_OWN_AG;
+	bcnt_btree_curs.owner = XFS_RMAP_OWN_AG;
+	setup_cursor(mp, agno, &bno_btree_curs);
+	setup_cursor(mp, agno, &bcnt_btree_curs);
 
 #ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "# of bno extents is %d\n",
-				count_bno_extents(agno));
-		fprintf(stderr, "# of bcnt extents is %d\n",
-				count_bcnt_extents(agno));
+	fprintf(stderr, "# of bno extents is %d\n",
+			count_bno_extents(agno));
+	fprintf(stderr, "# of bcnt extents is %d\n",
+			count_bcnt_extents(agno));
 #endif
 
-		/*
-		 * now rebuild the freespace trees
-		 */
-		freeblks1 = build_freespace_tree(mp, agno,
-					&bno_btree_curs, XFS_BTNUM_BNO);
+	/*
+	 * now rebuild the freespace trees
+	 */
+	freeblks1 = build_freespace_tree(mp, agno,
+				&bno_btree_curs, XFS_BTNUM_BNO);
 #ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "# of free blocks == %d\n", freeblks1);
+	fprintf(stderr, "# of free blocks == %d\n", freeblks1);
 #endif
-		write_cursor(&bno_btree_curs);
+	write_cursor(&bno_btree_curs);
 
 #ifdef DEBUG
-		freeblks2 = build_freespace_tree(mp, agno,
-					&bcnt_btree_curs, XFS_BTNUM_CNT);
+	freeblks2 = build_freespace_tree(mp, agno,
+				&bcnt_btree_curs, XFS_BTNUM_CNT);
 #else
-		(void) build_freespace_tree(mp, agno,
-					&bcnt_btree_curs, XFS_BTNUM_CNT);
+	(void) build_freespace_tree(mp, agno,
+				&bcnt_btree_curs, XFS_BTNUM_CNT);
 #endif
-		write_cursor(&bcnt_btree_curs);
+	write_cursor(&bcnt_btree_curs);
 
-		ASSERT(freeblks1 == freeblks2);
+	ASSERT(freeblks1 == freeblks2);
 
-		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
-			build_rmap_tree(mp, agno, &rmap_btree_curs);
-			write_cursor(&rmap_btree_curs);
-			sb_fdblocks_ag[agno] += (rmap_btree_curs.num_tot_blocks -
-					rmap_btree_curs.num_free_blocks) - 1;
-		}
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		build_rmap_tree(mp, agno, &rmap_btree_curs);
+		write_cursor(&rmap_btree_curs);
+		sb_fdblocks_ag[agno] += (rmap_btree_curs.num_tot_blocks -
+				rmap_btree_curs.num_free_blocks) - 1;
+	}
 
-		if (xfs_sb_version_hasreflink(&mp->m_sb)) {
-			build_refcount_tree(mp, agno, &refcnt_btree_curs);
-			write_cursor(&refcnt_btree_curs);
-		}
+	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+		build_refcount_tree(mp, agno, &refcnt_btree_curs);
+		write_cursor(&refcnt_btree_curs);
+	}
 
-		/*
-		 * set up agf and agfl
-		 */
-		build_agf_agfl(mp, agno, &bno_btree_curs,
-				&bcnt_btree_curs, freeblks1, extra_blocks,
-				&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
-		/*
-		 * build inode allocation tree.
-		 */
-		build_ino_tree(mp, agno, &ino_btree_curs, XFS_BTNUM_INO,
-				&agi_stat);
-		write_cursor(&ino_btree_curs);
+	/*
+	 * set up agf and agfl
+	 */
+	build_agf_agfl(mp, agno, &bno_btree_curs,
+			&bcnt_btree_curs, freeblks1, extra_blocks,
+			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
+	/*
+	 * build inode allocation tree.
+	 */
+	build_ino_tree(mp, agno, &ino_btree_curs, XFS_BTNUM_INO,
+			&agi_stat);
+	write_cursor(&ino_btree_curs);
 
-		/*
-		 * build free inode tree
-		 */
-		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-			build_ino_tree(mp, agno, &fino_btree_curs,
-					XFS_BTNUM_FINO, NULL);
-			write_cursor(&fino_btree_curs);
-		}
+	/*
+	 * build free inode tree
+	 */
+	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+		build_ino_tree(mp, agno, &fino_btree_curs,
+				XFS_BTNUM_FINO, NULL);
+		write_cursor(&fino_btree_curs);
+	}
 
-		/* build the agi */
-		build_agi(mp, agno, &ino_btree_curs, &fino_btree_curs,
-			  &agi_stat);
+	/* build the agi */
+	build_agi(mp, agno, &ino_btree_curs, &fino_btree_curs,
+		  &agi_stat);
 
-		/*
-		 * tear down cursors
-		 */
-		finish_cursor(&bno_btree_curs);
-		finish_cursor(&ino_btree_curs);
-		if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-			finish_cursor(&rmap_btree_curs);
-		if (xfs_sb_version_hasreflink(&mp->m_sb))
-			finish_cursor(&refcnt_btree_curs);
-		if (xfs_sb_version_hasfinobt(&mp->m_sb))
-			finish_cursor(&fino_btree_curs);
-		finish_cursor(&bcnt_btree_curs);
+	/*
+	 * tear down cursors
+	 */
+	finish_cursor(&bno_btree_curs);
+	finish_cursor(&ino_btree_curs);
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+		finish_cursor(&rmap_btree_curs);
+	if (xfs_sb_version_hasreflink(&mp->m_sb))
+		finish_cursor(&refcnt_btree_curs);
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		finish_cursor(&fino_btree_curs);
+	finish_cursor(&bcnt_btree_curs);
+
+	/*
+	 * release the incore per-AG bno/bcnt trees so
+	 * the extent nodes can be recycled
+	 */
+	release_agbno_extent_tree(agno);
+	release_agbcnt_extent_tree(agno);
 
-		/*
-		 * release the incore per-AG bno/bcnt trees so
-		 * the extent nodes can be recycled
-		 */
-		release_agbno_extent_tree(agno);
-		release_agbcnt_extent_tree(agno);
-	}
 	PROG_RPT_INC(prog_rpt_done[agno], 1);
 }
 


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 2/9] xfs_repair: unindent phase 5 function Darrick J. Wong
@ 2020-01-01  1:21 ` Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 4/9] xfs_repair: rebuild free space btrees with bulk loader Darrick J. Wong
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Create some new support structures and functions to assist phase5 in
using the btree bulk loader to reconstruct metadata btrees.  This is the
first step in removing the open-coded rebuilding code.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |  231 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 210 insertions(+), 21 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index 4108e22b..ec236d4c 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -18,6 +18,7 @@
 #include "progress.h"
 #include "slab.h"
 #include "rmap.h"
+#include "bload.h"
 
 /*
  * we maintain the current slice (path from root to leaf)
@@ -65,6 +66,14 @@ typedef struct bt_status  {
 	uint64_t		owner;		/* owner */
 } bt_status_t;
 
+struct bt_rebuild {
+	struct xrep_newbt	newbt;
+	struct xfs_btree_bload	bload;
+	union {
+		struct xfs_slab_cursor	*slab_cursor;
+	};
+};
+
 /*
  * extra metadata for the agi
  */
@@ -306,6 +315,157 @@ _("error - not enough free space in filesystem\n"));
 #endif
 }
 
+/*
+ * Estimate proper slack values for a btree that's being reloaded.
+ *
+ * Under most circumstances, we'll take whatever default loading value the
+ * btree bulk loading code calculates for us.  However, there are some
+ * exceptions to this rule:
+ *
+ * (1) If someone turned one of the debug knobs.
+ * (2) The AG has less than ~9% space free.
+ *
+ * Note that we actually use 3/32 for the comparison to avoid division.
+ */
+static void
+estimate_ag_bload_slack(
+	struct repair_ctx	*sc,
+	struct xfs_btree_bload	*bload,
+	unsigned int		free)
+{
+	/*
+	 * The global values are set to -1 (i.e. take the bload defaults)
+	 * unless someone has set them otherwise, so we just pull the values
+	 * here.
+	 */
+	bload->leaf_slack = bload_leaf_slack;
+	bload->node_slack = bload_node_slack;
+
+	/* No further changes if there's more than 3/32ths space left. */
+	if (free >= ((sc->mp->m_sb.sb_agblocks * 3) >> 5))
+		return;
+
+	/* We're low on space; load the btrees as tightly as possible. */
+	if (bload->leaf_slack < 0)
+		bload->leaf_slack = 0;
+	if (bload->node_slack < 0)
+		bload->node_slack = 0;
+}
+
+/* Initialize a btree rebuild context. */
+static void
+init_rebuild(
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_agblock_t			free_space,
+	struct bt_rebuild		*btr)
+{
+	memset(btr, 0, sizeof(struct bt_rebuild));
+
+	xrep_newbt_init_bare(&btr->newbt, sc);
+	btr->newbt.oinfo = *oinfo; /* struct copy */
+	estimate_ag_bload_slack(sc, &btr->bload, free_space);
+}
+
+/* Reserve blocks for the new btree. */
+static void
+setup_rebuild(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr,
+	uint32_t		nr_blocks)
+{
+	struct extent_tree_node	*ext_ptr;
+	struct extent_tree_node	*bno_ext_ptr;
+	uint32_t		blocks_allocated = 0;
+	int			error;
+
+	/*
+	 * grab the smallest extent and use it up, then get the
+	 * next smallest.  This mimics the init_*_cursor code.
+	 */
+	ext_ptr =  findfirst_bcnt_extent(agno);
+
+	/*
+	 * set up the free block array
+	 */
+	while (blocks_allocated < nr_blocks)  {
+		uint64_t	len;
+		xfs_agblock_t	new_start;
+		xfs_extlen_t	new_len;
+
+		if (!ext_ptr)
+			do_error(
+_("error - not enough free space in filesystem\n"));
+
+		/* Use up the extent we've got. */
+		len = min(ext_ptr->ex_blockcount,
+				btr->bload.nr_blocks - blocks_allocated);
+		error = xrep_newbt_add_reservation(&btr->newbt,
+				XFS_AGB_TO_FSB(mp, agno,
+					       ext_ptr->ex_startblock),
+				len, NULL);
+		if (error)
+			do_error(_("could not set up btree reservation: %s\n"),
+				strerror(-error));
+		blocks_allocated += len;
+
+		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
+				btr->newbt.oinfo.oi_owner);
+		if (error)
+			do_error(_("could not set up btree rmaps: %s\n"),
+				strerror(-error));
+
+		/* Figure out if we're putting anything back. */
+		new_start = ext_ptr->ex_startblock + len;
+		new_len = ext_ptr->ex_blockcount - len;
+
+		/* Delete the used-up extent from both extent trees. */
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "releasing extent: %u [%u %u]\n",
+			agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+#endif
+		bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
+		ASSERT(bno_ext_ptr != NULL);
+		get_bno_extent(agno, bno_ext_ptr);
+		release_extent_tree_node(bno_ext_ptr);
+
+		ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
+				ext_ptr->ex_blockcount);
+		ASSERT(ext_ptr != NULL);
+		release_extent_tree_node(ext_ptr);
+
+		/*
+		 * If we only used part of this last extent, then we need only
+		 * to reinsert the extent in the extent trees and we're done.
+		 */
+		if (new_len > 0) {
+			add_bno_extent(agno, new_start, new_len);
+			add_bcnt_extent(agno, new_start, new_len);
+			break;
+		}
+
+		/* Otherwise, find the next biggest extent. */
+		ext_ptr = findfirst_bcnt_extent(agno);
+	}
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "blocks_allocated = %d\n",
+		blocks_allocated);
+#endif
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+static int
+rebuild_alloc_block(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	void			*priv)
+{
+	struct bt_rebuild	*btr = priv;
+
+	return xrep_newbt_alloc_block(cur, &btr->newbt, ptr);
+}
+
 static void
 write_cursor(bt_status_t *curs)
 {
@@ -334,6 +494,34 @@ finish_cursor(bt_status_t *curs)
 	free(curs->btree_blocks);
 }
 
+static void
+finish_rebuild(
+	struct xfs_mount	*mp,
+	struct bt_rebuild	*btr)
+{
+	struct xrep_newbt_resv	*resv, *n;
+
+	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
+		xfs_agnumber_t	agno;
+		xfs_agblock_t	bno;
+		xfs_extlen_t	len;
+
+		if (resv->used >= resv->len)
+			continue;
+
+		/* XXX: Shouldn't this go on the AGFL? */
+		/* Put back everything we didn't use. */
+		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
+		agno = XFS_FSB_TO_AGNO(mp, resv->fsbno + resv->used);
+		len = resv->len - resv->used;
+
+		add_bno_extent(agno, bno, len);
+		add_bcnt_extent(agno, bno, len);
+	}
+
+	xrep_newbt_destroy(&btr->newbt, 0);
+}
+
 /*
  * We need to leave some free records in the tree for the corner case of
  * setting up the AGFL. This may require allocation of blocks, and as
@@ -2211,28 +2399,29 @@ keep_fsinos(xfs_mount_t *mp)
 
 static void
 phase5_func(
-	xfs_mount_t	*mp,
-	xfs_agnumber_t	agno,
-	struct xfs_slab	*lost_fsb)
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct xfs_slab		*lost_fsb)
 {
-	uint64_t	num_inos;
-	uint64_t	num_free_inos;
-	uint64_t	finobt_num_inos;
-	uint64_t	finobt_num_free_inos;
-	bt_status_t	bno_btree_curs;
-	bt_status_t	bcnt_btree_curs;
-	bt_status_t	ino_btree_curs;
-	bt_status_t	fino_btree_curs;
-	bt_status_t	rmap_btree_curs;
-	bt_status_t	refcnt_btree_curs;
-	int		extra_blocks = 0;
-	uint		num_freeblocks;
-	xfs_extlen_t	freeblks1;
+	struct repair_ctx	sc = { .mp = mp, };
+	struct agi_stat		agi_stat = {0,};
+	uint64_t		num_inos;
+	uint64_t		num_free_inos;
+	uint64_t		finobt_num_inos;
+	uint64_t		finobt_num_free_inos;
+	bt_status_t		bno_btree_curs;
+	bt_status_t		bcnt_btree_curs;
+	bt_status_t		ino_btree_curs;
+	bt_status_t		fino_btree_curs;
+	bt_status_t		rmap_btree_curs;
+	bt_status_t		refcnt_btree_curs;
+	int			extra_blocks = 0;
+	uint			num_freeblocks;
+	xfs_extlen_t		freeblks1;
 #ifdef DEBUG
-	xfs_extlen_t	freeblks2;
+	xfs_extlen_t		freeblks2;
 #endif
-	xfs_agblock_t	num_extents;
-	struct agi_stat	agi_stat = {0,};
+	xfs_agblock_t		num_extents;
 
 	if (verbose)
 		do_log(_("        - agno = %d\n"), agno);
@@ -2454,8 +2643,8 @@ inject_lost_blocks(
 		if (error)
 			goto out_cancel;
 
-		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
-					    XFS_AG_RESV_NONE);
+		error = -libxfs_free_extent(tp, *fsb, 1,
+				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
 		if (error)
 			goto out_cancel;
 


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 4/9] xfs_repair: rebuild free space btrees with bulk loader
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (2 preceding siblings ...)
  2020-01-01  1:21 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
@ 2020-01-01  1:21 ` Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 5/9] xfs_repair: rebuild inode " Darrick J. Wong
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the btree bulk loading functions to rebuild the free space btrees
and drop the open-coded implementation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    3 
 repair/phase5.c          |  859 ++++++++++++++--------------------------------
 2 files changed, 260 insertions(+), 602 deletions(-)


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 6bab5a70..60dc9297 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -178,5 +178,8 @@
 #define xfs_ialloc_calc_rootino		libxfs_ialloc_calc_rootino
 
 #define xfs_sb_read_secondary		libxfs_sb_read_secondary
+#define xfs_btree_bload_compute_geometry libxfs_btree_bload_compute_geometry
+#define xfs_btree_bload			libxfs_btree_bload
+#define xfs_allocbt_stage_cursor	libxfs_allocbt_stage_cursor
 
 #endif /* __LIBXFS_API_DEFS_H__ */
diff --git a/repair/phase5.c b/repair/phase5.c
index ec236d4c..2421c4bc 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -71,6 +71,10 @@ struct bt_rebuild {
 	struct xfs_btree_bload	bload;
 	union {
 		struct xfs_slab_cursor	*slab_cursor;
+		struct {
+			struct extent_tree_node	*bno_rec;
+			xfs_agblock_t		*freeblks;
+		};
 	};
 };
 
@@ -88,7 +92,10 @@ static uint64_t	*sb_ifree_ag;		/* free inodes per ag */
 static uint64_t	*sb_fdblocks_ag;	/* free data blocks per ag */
 
 static int
-mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
+mk_incore_fstree(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	unsigned int		*num_freeblocks)
 {
 	int			in_extent;
 	int			num_extents;
@@ -100,6 +107,8 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
 	xfs_extlen_t		blen;
 	int			bstate;
 
+	*num_freeblocks = 0;
+
 	/*
 	 * scan the bitmap for the ag looking for continuous
 	 * extents of free blocks.  At this point, we know
@@ -155,6 +164,7 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
 #endif
 				add_bno_extent(agno, extent_start, extent_len);
 				add_bcnt_extent(agno, extent_start, extent_len);
+				*num_freeblocks += extent_len;
 			}
 		}
 	}
@@ -168,6 +178,7 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
 #endif
 		add_bno_extent(agno, extent_start, extent_len);
 		add_bcnt_extent(agno, extent_start, extent_len);
+		*num_freeblocks += extent_len;
 	}
 
 	return(num_extents);
@@ -494,313 +505,32 @@ finish_cursor(bt_status_t *curs)
 	free(curs->btree_blocks);
 }
 
+/*
+ * Scoop up leftovers from a rebuild cursor for later freeing, then free the
+ * rebuild context.
+ */
 static void
 finish_rebuild(
 	struct xfs_mount	*mp,
-	struct bt_rebuild	*btr)
+	struct bt_rebuild	*btr,
+	struct xfs_slab		*lost_fsb)
 {
 	struct xrep_newbt_resv	*resv, *n;
 
 	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
-		xfs_agnumber_t	agno;
-		xfs_agblock_t	bno;
-		xfs_extlen_t	len;
-
-		if (resv->used >= resv->len)
-			continue;
-
-		/* XXX: Shouldn't this go on the AGFL? */
-		/* Put back everything we didn't use. */
-		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
-		agno = XFS_FSB_TO_AGNO(mp, resv->fsbno + resv->used);
-		len = resv->len - resv->used;
-
-		add_bno_extent(agno, bno, len);
-		add_bcnt_extent(agno, bno, len);
-	}
-
-	xrep_newbt_destroy(&btr->newbt, 0);
-}
-
-/*
- * We need to leave some free records in the tree for the corner case of
- * setting up the AGFL. This may require allocation of blocks, and as
- * such can require insertion of new records into the tree (e.g. moving
- * a record in the by-count tree when a long extent is shortened). If we
- * pack the records into the leaves with no slack space, this requires a
- * leaf split to occur and a block to be allocated from the free list.
- * If we don't have any blocks on the free list (because we are setting
- * it up!), then we fail, and the filesystem will fail with the same
- * failure at runtime. Hence leave a couple of records slack space in
- * each block to allow immediate modification of the tree without
- * requiring splits to be done.
- *
- * XXX(hch): any reason we don't just look at mp->m_alloc_mxr?
- */
-#define XR_ALLOC_BLOCK_MAXRECS(mp, level) \
-	(libxfs_allocbt_maxrecs((mp), (mp)->m_sb.sb_blocksize, (level) == 0) - 2)
-
-/*
- * this calculates a freespace cursor for an ag.
- * btree_curs is an in/out.  returns the number of
- * blocks that will show up in the AGFL.
- */
-static int
-calculate_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno,
-			xfs_agblock_t *extents, bt_status_t *btree_curs)
-{
-	xfs_extlen_t		blocks_needed;		/* a running count */
-	xfs_extlen_t		blocks_allocated_pt;	/* per tree */
-	xfs_extlen_t		blocks_allocated_total;	/* for both trees */
-	xfs_agblock_t		num_extents;
-	int			i;
-	int			extents_used;
-	int			extra_blocks;
-	bt_stat_level_t		*lptr;
-	bt_stat_level_t		*p_lptr;
-	extent_tree_node_t	*ext_ptr;
-	int			level;
-
-	num_extents = *extents;
-	extents_used = 0;
-
-	ASSERT(num_extents != 0);
-
-	lptr = &btree_curs->level[0];
-	btree_curs->init = 1;
-
-	/*
-	 * figure out how much space we need for the leaf level
-	 * of the tree and set up the cursor for the leaf level
-	 * (note that the same code is duplicated further down)
-	 */
-	lptr->num_blocks = howmany(num_extents, XR_ALLOC_BLOCK_MAXRECS(mp, 0));
-	lptr->num_recs_pb = num_extents / lptr->num_blocks;
-	lptr->modulo = num_extents % lptr->num_blocks;
-	lptr->num_recs_tot = num_extents;
-	level = 1;
-
-#ifdef XR_BLD_FREE_TRACE
-	fprintf(stderr, "%s 0 %d %d %d %d\n", __func__,
-			lptr->num_blocks,
-			lptr->num_recs_pb,
-			lptr->modulo,
-			lptr->num_recs_tot);
-#endif
-	/*
-	 * if we need more levels, set them up.  # of records
-	 * per level is the # of blocks in the level below it
-	 */
-	if (lptr->num_blocks > 1)  {
-		for (; btree_curs->level[level - 1].num_blocks > 1
-				&& level < XFS_BTREE_MAXLEVELS;
-				level++)  {
-			lptr = &btree_curs->level[level];
-			p_lptr = &btree_curs->level[level - 1];
-			lptr->num_blocks = howmany(p_lptr->num_blocks,
-					XR_ALLOC_BLOCK_MAXRECS(mp, level));
-			lptr->modulo = p_lptr->num_blocks
-					% lptr->num_blocks;
-			lptr->num_recs_pb = p_lptr->num_blocks
-					/ lptr->num_blocks;
-			lptr->num_recs_tot = p_lptr->num_blocks;
-#ifdef XR_BLD_FREE_TRACE
-			fprintf(stderr, "%s %d %d %d %d %d\n", __func__,
-					level,
-					lptr->num_blocks,
-					lptr->num_recs_pb,
-					lptr->modulo,
-					lptr->num_recs_tot);
-#endif
-		}
-	}
+		while (resv->used < resv->len) {
+			xfs_fsblock_t	fsb = resv->fsbno + resv->used;
+			int		error;
 
-	ASSERT(lptr->num_blocks == 1);
-	btree_curs->num_levels = level;
-
-	/*
-	 * ok, now we have a hypothetical cursor that
-	 * will work for both the bno and bcnt trees.
-	 * now figure out if using up blocks to set up the
-	 * trees will perturb the shape of the freespace tree.
-	 * if so, we've over-allocated.  the freespace trees
-	 * as they will be *after* accounting for the free space
-	 * we've used up will need fewer blocks to to represent
-	 * than we've allocated.  We can use the AGFL to hold
-	 * xfs_agfl_size (sector/xfs_agfl_t) blocks but that's it.
-	 * Thus we limit things to xfs_agfl_size/2 for each of the 2 btrees.
-	 * if the number of extra blocks is more than that,
-	 * we'll have to be called again.
-	 */
-	for (blocks_needed = 0, i = 0; i < level; i++)  {
-		blocks_needed += btree_curs->level[i].num_blocks;
-	}
-
-	/*
-	 * record the # of blocks we've allocated
-	 */
-	blocks_allocated_pt = blocks_needed;
-	blocks_needed *= 2;
-	blocks_allocated_total = blocks_needed;
-
-	/*
-	 * figure out how many free extents will be used up by
-	 * our space allocation
-	 */
-	if ((ext_ptr = findfirst_bcnt_extent(agno)) == NULL)
-		do_error(_("can't rebuild fs trees -- not enough free space "
-			   "on ag %u\n"), agno);
-
-	while (ext_ptr != NULL && blocks_needed > 0)  {
-		if (ext_ptr->ex_blockcount <= blocks_needed)  {
-			blocks_needed -= ext_ptr->ex_blockcount;
-			extents_used++;
-		} else  {
-			blocks_needed = 0;
-		}
-
-		ext_ptr = findnext_bcnt_extent(agno, ext_ptr);
-
-#ifdef XR_BLD_FREE_TRACE
-		if (ext_ptr != NULL)  {
-			fprintf(stderr, "got next extent [%u %u]\n",
-				ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
-		} else  {
-			fprintf(stderr, "out of extents\n");
-		}
-#endif
-	}
-	if (blocks_needed > 0)
-		do_error(_("ag %u - not enough free space to build freespace "
-			   "btrees\n"), agno);
-
-	ASSERT(num_extents >= extents_used);
-
-	num_extents -= extents_used;
-
-	/*
-	 * see if the number of leaf blocks will change as a result
-	 * of the number of extents changing
-	 */
-	if (howmany(num_extents, XR_ALLOC_BLOCK_MAXRECS(mp, 0))
-			!= btree_curs->level[0].num_blocks)  {
-		/*
-		 * yes -- recalculate the cursor.  If the number of
-		 * excess (overallocated) blocks is < xfs_agfl_size/2, we're ok.
-		 * we can put those into the AGFL.  we don't try
-		 * and get things to converge exactly (reach a
-		 * state with zero excess blocks) because there
-		 * exist pathological cases which will never
-		 * converge.  first, check for the zero-case.
-		 */
-		if (num_extents == 0)  {
-			/*
-			 * ok, we've used up all the free blocks
-			 * trying to lay out the leaf level. go
-			 * to a one block (empty) btree and put the
-			 * already allocated blocks into the AGFL
-			 */
-			if (btree_curs->level[0].num_blocks != 1)  {
-				/*
-				 * we really needed more blocks because
-				 * the old tree had more than one level.
-				 * this is bad.
-				 */
-				 do_warn(_("not enough free blocks left to "
-					   "describe all free blocks in AG "
-					   "%u\n"), agno);
-			}
-#ifdef XR_BLD_FREE_TRACE
-			fprintf(stderr,
-				"ag %u -- no free extents, alloc'ed %d\n",
-				agno, blocks_allocated_pt);
-#endif
-			lptr->num_blocks = 1;
-			lptr->modulo = 0;
-			lptr->num_recs_pb = 0;
-			lptr->num_recs_tot = 0;
-
-			btree_curs->num_levels = 1;
-
-			/*
-			 * don't reset the allocation stats, assume
-			 * they're all extra blocks
-			 * don't forget to return the total block count
-			 * not the per-tree block count.  these are the
-			 * extras that will go into the AGFL.  subtract
-			 * two for the root blocks.
-			 */
-			btree_curs->num_tot_blocks = blocks_allocated_pt;
-			btree_curs->num_free_blocks = blocks_allocated_pt;
-
-			*extents = 0;
-
-			return(blocks_allocated_total - 2);
-		}
-
-		lptr = &btree_curs->level[0];
-		lptr->num_blocks = howmany(num_extents,
-					XR_ALLOC_BLOCK_MAXRECS(mp, 0));
-		lptr->num_recs_pb = num_extents / lptr->num_blocks;
-		lptr->modulo = num_extents % lptr->num_blocks;
-		lptr->num_recs_tot = num_extents;
-		level = 1;
-
-		/*
-		 * if we need more levels, set them up
-		 */
-		if (lptr->num_blocks > 1)  {
-			for (level = 1; btree_curs->level[level-1].num_blocks
-					> 1 && level < XFS_BTREE_MAXLEVELS;
-					level++)  {
-				lptr = &btree_curs->level[level];
-				p_lptr = &btree_curs->level[level-1];
-				lptr->num_blocks = howmany(p_lptr->num_blocks,
-					XR_ALLOC_BLOCK_MAXRECS(mp, level));
-				lptr->modulo = p_lptr->num_blocks
-						% lptr->num_blocks;
-				lptr->num_recs_pb = p_lptr->num_blocks
-						/ lptr->num_blocks;
-				lptr->num_recs_tot = p_lptr->num_blocks;
-			}
-		}
-		ASSERT(lptr->num_blocks == 1);
-		btree_curs->num_levels = level;
-
-		/*
-		 * now figure out the number of excess blocks
-		 */
-		for (blocks_needed = 0, i = 0; i < level; i++)  {
-			blocks_needed += btree_curs->level[i].num_blocks;
-		}
-		blocks_needed *= 2;
-
-		ASSERT(blocks_allocated_total >= blocks_needed);
-		extra_blocks = blocks_allocated_total - blocks_needed;
-	} else  {
-		if (extents_used > 0) {
-			/*
-			 * reset the leaf level geometry to account
-			 * for consumed extents.  we can leave the
-			 * rest of the cursor alone since the number
-			 * of leaf blocks hasn't changed.
-			 */
-			lptr = &btree_curs->level[0];
-
-			lptr->num_recs_pb = num_extents / lptr->num_blocks;
-			lptr->modulo = num_extents % lptr->num_blocks;
-			lptr->num_recs_tot = num_extents;
+			error = slab_add(lost_fsb, &fsb);
+			if (error)
+				do_error(
+_("Insufficient memory saving lost blocks.\n"));
+			resv->used++;
 		}
-
-		extra_blocks = 0;
 	}
 
-	btree_curs->num_tot_blocks = blocks_allocated_pt;
-	btree_curs->num_free_blocks = blocks_allocated_pt;
-
-	*extents = num_extents;
-
-	return(extra_blocks);
+	xrep_newbt_destroy(&btr->newbt, 0);
 }
 
 /* Map btnum to buffer ops for the types that need it. */
@@ -827,251 +557,211 @@ btnum_to_ops(
 	}
 }
 
+/*
+ * Free Space Btrees
+ *
+ * We need to leave some free records in the tree for the corner case of
+ * setting up the AGFL. This may require allocation of blocks, and as
+ * such can require insertion of new records into the tree (e.g. moving
+ * a record in the by-count tree when a long extent is shortened). If we
+ * pack the records into the leaves with no slack space, this requires a
+ * leaf split to occur and a block to be allocated from the free list.
+ * If we don't have any blocks on the free list (because we are setting
+ * it up!), then we fail, and the filesystem will fail with the same
+ * failure at runtime. Hence leave a couple of records slack space in
+ * each block to allow immediate modification of the tree without
+ * requiring splits to be done.
+ */
+
 static void
-prop_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno,
-		bt_status_t *btree_curs, xfs_agblock_t startblock,
-		xfs_extlen_t blockcount, int level, xfs_btnum_t btnum)
+init_freespace_cursors(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	unsigned int		free_space,
+	unsigned int		*nr_extents,
+	int			*extra_blocks,
+	struct bt_rebuild	*btr_bno,
+	struct bt_rebuild	*btr_cnt)
 {
-	struct xfs_btree_block	*bt_hdr;
-	xfs_alloc_key_t		*bt_key;
-	xfs_alloc_ptr_t		*bt_ptr;
-	xfs_agblock_t		agbno;
-	bt_stat_level_t		*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
-
-	ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
-
-	level++;
-
-	if (level >= btree_curs->num_levels)
-		return;
-
-	lptr = &btree_curs->level[level];
-	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
-		/*
-		 * only happens once when initializing the
-		 * left-hand side of the tree.
-		 */
-		prop_freespace_cursor(mp, agno, btree_curs, startblock,
-				blockcount, level, btnum);
-	}
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
-				lptr->num_recs_pb + (lptr->modulo > 0))  {
-		/*
-		 * write out current prev block, grab us a new block,
-		 * and set the rightsib pointer of current block
-		 */
-#ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, " %d ", lptr->prev_agbno);
-#endif
-		if (lptr->prev_agbno != NULLAGBLOCK) {
-			ASSERT(lptr->prev_buf_p != NULL);
-			libxfs_writebuf(lptr->prev_buf_p, 0);
-		}
-		lptr->prev_agbno = lptr->agbno;;
-		lptr->prev_buf_p = lptr->buf_p;
-		agbno = get_next_blockaddr(agno, level, btree_curs);
+	struct xfs_btree_cur	*cur;
+	unsigned int		bno_blocks;
+	unsigned int		cnt_blocks;
+	int			error;
 
-		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
+	init_rebuild(sc, &XFS_RMAP_OINFO_AG, free_space, btr_bno);
+	init_rebuild(sc, &XFS_RMAP_OINFO_AG, free_space, btr_cnt);
 
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		lptr->agbno = agbno;
+	/*
+	 * Now we need to allocate blocks for the free space btrees using the
+	 * free space records we're about to put in them.  Every record we use
+	 * can change the shape of the free space trees, so we recompute the
+	 * btree shape until we stop needing /more/ blocks.  If we have any
+	 * left over we'll stash them in the AGFL when we're done.
+	 */
+	do {
+		unsigned int	num_freeblocks;
 
-		if (lptr->modulo)
-			lptr->modulo--;
+		bno_blocks = btr_bno->bload.nr_blocks;
+		cnt_blocks = btr_cnt->bload.nr_blocks;
 
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, level,
-					0, agno);
+		/* Compute how many bnobt blocks we'll need. */
+		cur = libxfs_allocbt_stage_cursor(sc->mp, sc->tp,
+				&btr_bno->newbt.afake, agno, XFS_BTNUM_BNO);
+		error = -libxfs_btree_bload_compute_geometry(cur,
+				&btr_bno->bload, *nr_extents);
+		if (error)
+			do_error(
+_("Unable to compute free space by block btree geometry, error %d.\n"), -error);
+		libxfs_btree_del_cursor(cur, error);
+
+		/* Compute how many cntbt blocks we'll need. */
+		cur = libxfs_allocbt_stage_cursor(sc->mp, sc->tp,
+				&btr_cnt->newbt.afake, agno, XFS_BTNUM_CNT);
+		error = -libxfs_btree_bload_compute_geometry(cur,
+				&btr_cnt->bload, *nr_extents);
+		if (error)
+			do_error(
+_("Unable to compute free space by length btree geometry, error %d.\n"), -error);
+		libxfs_btree_del_cursor(cur, error);
 
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+		/* We don't need any more blocks, so we're done. */
+		if (bno_blocks >= btr_bno->bload.nr_blocks &&
+		    cnt_blocks >= btr_cnt->bload.nr_blocks)
+			break;
 
-		/*
-		 * propagate extent record for first extent in new block up
-		 */
-		prop_freespace_cursor(mp, agno, btree_curs, startblock,
-				blockcount, level, btnum);
-	}
-	/*
-	 * add extent info to current block
-	 */
-	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
+		/* Allocate however many more blocks we need this time. */
+		if (bno_blocks < btr_bno->bload.nr_blocks)
+			setup_rebuild(sc->mp, agno, btr_bno,
+					btr_bno->bload.nr_blocks - bno_blocks);
+		if (cnt_blocks < btr_cnt->bload.nr_blocks)
+			setup_rebuild(sc->mp, agno, btr_cnt,
+					btr_cnt->bload.nr_blocks - cnt_blocks);
 
-	bt_key = XFS_ALLOC_KEY_ADDR(mp, bt_hdr,
-				be16_to_cpu(bt_hdr->bb_numrecs));
-	bt_ptr = XFS_ALLOC_PTR_ADDR(mp, bt_hdr,
-				be16_to_cpu(bt_hdr->bb_numrecs),
-				mp->m_alloc_mxr[1]);
+		/* Ok, now how many free space records do we have? */
+		*nr_extents = count_bno_extents_blocks(agno, &num_freeblocks);
+	} while (1);
 
-	bt_key->ar_startblock = cpu_to_be32(startblock);
-	bt_key->ar_blockcount = cpu_to_be32(blockcount);
-	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+	*extra_blocks = (bno_blocks - btr_bno->bload.nr_blocks) +
+			(cnt_blocks - btr_cnt->bload.nr_blocks);
 }
 
-/*
- * rebuilds a freespace tree given a cursor and type
- * of tree to build (bno or bcnt).  returns the number of free blocks
- * represented by the tree.
- */
-static xfs_extlen_t
-build_freespace_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
-		bt_status_t *btree_curs, xfs_btnum_t btnum)
+static void
+get_freesp_data(
+	struct xfs_btree_cur		*cur,
+	struct extent_tree_node		*bno_rec,
+	xfs_agblock_t			*freeblks)
 {
-	xfs_agnumber_t		i;
-	xfs_agblock_t		j;
-	struct xfs_btree_block	*bt_hdr;
-	xfs_alloc_rec_t		*bt_rec;
-	int			level;
-	xfs_agblock_t		agbno;
-	extent_tree_node_t	*ext_ptr;
-	bt_stat_level_t		*lptr;
-	xfs_extlen_t		freeblks;
-	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
-
-	ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
+	struct xfs_alloc_rec_incore	*arec = &cur->bc_rec.a;
 
-#ifdef XR_BLD_FREE_TRACE
-	fprintf(stderr, "in build_freespace_tree, agno = %d\n", agno);
-#endif
-	level = btree_curs->num_levels;
-	freeblks = 0;
+	arec->ar_startblock = bno_rec->ex_startblock;
+	arec->ar_blockcount = bno_rec->ex_blockcount;
+	if (freeblks)
+		*freeblks += bno_rec->ex_blockcount;
+}
 
-	ASSERT(level > 0);
+/* Grab one bnobt record. */
+static int
+get_bnobt_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
+{
+	struct bt_rebuild		*btr = priv;
 
-	/*
-	 * initialize the first block on each btree level
-	 */
-	for (i = 0; i < level; i++)  {
-		lptr = &btree_curs->level[i];
+	get_freesp_data(cur, btr->bno_rec, btr->freeblks);
+	btr->bno_rec = findnext_bno_extent(btr->bno_rec);
+	return 0;
+}
 
-		agbno = get_next_blockaddr(agno, i, btree_curs);
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
+/* Rebuild a free space by block number btree. */
+static void
+build_bnobt(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_bno,
+	xfs_agblock_t		*freeblks)
+{
+	struct xfs_btree_cur	*cur;
+	int			error;
 
-		if (i == btree_curs->num_levels - 1)
-			btree_curs->root = agbno;
+	*freeblks = 0;
+	btr_bno->bload.get_data = get_bnobt_data;
+	btr_bno->bload.alloc_block = rebuild_alloc_block;
+	btr_bno->bno_rec = findfirst_bno_extent(agno);
+	btr_bno->freeblks = freeblks;
 
-		lptr->agbno = agbno;
-		lptr->prev_agbno = NULLAGBLOCK;
-		lptr->prev_buf_p = NULL;
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, i, 0, agno);
-	}
-	/*
-	 * run along leaf, setting up records.  as we have to switch
-	 * blocks, call the prop_freespace_cursor routine to set up the new
-	 * pointers for the parent.  that can recurse up to the root
-	 * if required.  set the sibling pointers for leaf level here.
-	 */
-	if (btnum == XFS_BTNUM_BNO)
-		ext_ptr = findfirst_bno_extent(agno);
-	else
-		ext_ptr = findfirst_bcnt_extent(agno);
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
+	if (error)
+		do_error(
+_("Insufficient memory to construct bnobt rebuild transaction.\n"));
 
-#ifdef XR_BLD_FREE_TRACE
-	fprintf(stderr, "bft, agno = %d, start = %u, count = %u\n",
-		agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
-#endif
+	/* Add all observed bnobt records. */
+	cur = libxfs_allocbt_stage_cursor(sc->mp, sc->tp,
+			&btr_bno->newbt.afake, agno, XFS_BTNUM_BNO);
+	error = -libxfs_btree_bload(cur, &btr_bno->bload, btr_bno);
+	if (error)
+		do_error(
+_("Error %d while creating bnobt btree for AG %u.\n"), error, agno);
 
-	lptr = &btree_curs->level[0];
+	/* Since we're not writing the AGF yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(cur, 0);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing bnobt btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
+}
 
-	for (i = 0; i < btree_curs->level[0].num_blocks; i++)  {
-		/*
-		 * block initialization, lay in block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, 0, 0, agno);
+/* Grab one cntbt record. */
+static int
+get_cntbt_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
+{
+	struct bt_rebuild		*btr = priv;
 
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-		bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
-							(lptr->modulo > 0));
-#ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "bft, bb_numrecs = %d\n",
-				be16_to_cpu(bt_hdr->bb_numrecs));
-#endif
+	get_freesp_data(cur, btr->bno_rec, btr->freeblks);
+	btr->bno_rec = findnext_bcnt_extent(cur->bc_private.a.agno,
+			btr->bno_rec);
+	return 0;
+}
 
-		if (lptr->modulo > 0)
-			lptr->modulo--;
+/* Rebuild a freespace by count btree. */
+static void
+build_cntbt(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_cnt,
+	xfs_agblock_t		*freeblks)
+{
+	struct xfs_btree_cur	*cur;
+	int			error;
 
-		/*
-		 * initialize values in the path up to the root if
-		 * this is a multi-level btree
-		 */
-		if (btree_curs->num_levels > 1)
-			prop_freespace_cursor(mp, agno, btree_curs,
-					ext_ptr->ex_startblock,
-					ext_ptr->ex_blockcount,
-					0, btnum);
-
-		bt_rec = (xfs_alloc_rec_t *)
-			  ((char *)bt_hdr + XFS_ALLOC_BLOCK_LEN(mp));
-		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
-			ASSERT(ext_ptr != NULL);
-			bt_rec[j].ar_startblock = cpu_to_be32(
-							ext_ptr->ex_startblock);
-			bt_rec[j].ar_blockcount = cpu_to_be32(
-							ext_ptr->ex_blockcount);
-			freeblks += ext_ptr->ex_blockcount;
-			if (btnum == XFS_BTNUM_BNO)
-				ext_ptr = findnext_bno_extent(ext_ptr);
-			else
-				ext_ptr = findnext_bcnt_extent(agno, ext_ptr);
-#if 0
-#ifdef XR_BLD_FREE_TRACE
-			if (ext_ptr == NULL)
-				fprintf(stderr, "null extent pointer, j = %d\n",
-					j);
-			else
-				fprintf(stderr,
-				"bft, agno = %d, start = %u, count = %u\n",
-					agno, ext_ptr->ex_startblock,
-					ext_ptr->ex_blockcount);
-#endif
-#endif
-		}
+	*freeblks = 0;
+	btr_cnt->bload.get_data = get_cntbt_data;
+	btr_cnt->bload.alloc_block = rebuild_alloc_block;
+	btr_cnt->bno_rec = findfirst_bcnt_extent(agno);
+	btr_cnt->freeblks = freeblks;
 
-		if (ext_ptr != NULL)  {
-			/*
-			 * get next leaf level block
-			 */
-			if (lptr->prev_buf_p != NULL)  {
-#ifdef XR_BLD_FREE_TRACE
-				fprintf(stderr, " writing fst agbno %u\n",
-					lptr->prev_agbno);
-#endif
-				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
-				libxfs_writebuf(lptr->prev_buf_p, 0);
-			}
-			lptr->prev_buf_p = lptr->buf_p;
-			lptr->prev_agbno = lptr->agbno;
-			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
-			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
+	if (error)
+		do_error(
+_("Insufficient memory to construct cntbt rebuild transaction.\n"));
 
-			lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		}
-	}
+	/* Add all observed cntbt records. */
+	cur = libxfs_allocbt_stage_cursor(sc->mp, sc->tp,
+			&btr_cnt->newbt.afake, agno, XFS_BTNUM_CNT);
+	error = -libxfs_btree_bload(cur, &btr_cnt->bload, btr_cnt);
+	if (error)
+		do_error(
+_("Error %d while creating cntbt btree for AG %u.\n"), error, agno);
 
-	return(freeblks);
+	/* Since we're not writing the AGF yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(cur, 0);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing cntbt btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
 }
 
 /*
@@ -2157,6 +1847,27 @@ _("Insufficient memory to construct refcount cursor."));
 	free_slab_cursor(&refc_cur);
 }
 
+/* Fill the AGFL with any leftover bnobt rebuilder blocks. */
+static void
+fill_agfl(
+	struct bt_rebuild	*btr,
+	__be32			*agfl_bnos,
+	int			*i)
+{
+	struct xrep_newbt_resv	*resv, *n;
+	struct xfs_mount	*mp = btr->newbt.sc->mp;
+
+	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
+		xfs_agblock_t	bno;
+
+		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
+		while (resv->used < resv->len && (*i) < libxfs_agfl_size(mp)) {
+			agfl_bnos[(*i)++] = cpu_to_be32(bno++);
+			resv->used++;
+		}
+	}
+}
+
 /*
  * build both the agf and the agfl for an agno given both
  * btree cursors.
@@ -2167,8 +1878,8 @@ static void
 build_agf_agfl(
 	struct xfs_mount	*mp,
 	xfs_agnumber_t		agno,
-	struct bt_status	*bno_bt,
-	struct bt_status	*bcnt_bt,
+	struct bt_rebuild	*btr_bno,
+	struct bt_rebuild	*btr_cnt,
 	xfs_extlen_t		freeblks,	/* # free blocks in tree */
 	int			lostblocks,	/* # blocks that will be lost */
 	struct bt_status	*rmap_bt,
@@ -2180,9 +1891,7 @@ build_agf_agfl(
 	int			i;
 	struct xfs_agfl		*agfl;
 	struct xfs_agf		*agf;
-	xfs_fsblock_t		fsb;
 	__be32			*freelist;
-	int			error;
 
 	agf_buf = libxfs_getbuf(mp->m_dev,
 			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
@@ -2209,10 +1918,14 @@ build_agf_agfl(
 		agf->agf_length = cpu_to_be32(mp->m_sb.sb_dblocks -
 			(xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno);
 
-	agf->agf_roots[XFS_BTNUM_BNO] = cpu_to_be32(bno_bt->root);
-	agf->agf_levels[XFS_BTNUM_BNO] = cpu_to_be32(bno_bt->num_levels);
-	agf->agf_roots[XFS_BTNUM_CNT] = cpu_to_be32(bcnt_bt->root);
-	agf->agf_levels[XFS_BTNUM_CNT] = cpu_to_be32(bcnt_bt->num_levels);
+	agf->agf_roots[XFS_BTNUM_BNO] =
+			cpu_to_be32(btr_bno->newbt.afake.af_root);
+	agf->agf_levels[XFS_BTNUM_BNO] =
+			cpu_to_be32(btr_bno->newbt.afake.af_levels);
+	agf->agf_roots[XFS_BTNUM_CNT] =
+			cpu_to_be32(btr_cnt->newbt.afake.af_root);
+	agf->agf_levels[XFS_BTNUM_CNT] =
+			cpu_to_be32(btr_cnt->newbt.afake.af_levels);
 	agf->agf_roots[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->root);
 	agf->agf_levels[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->num_levels);
 	agf->agf_freeblks = cpu_to_be32(freeblks);
@@ -2232,9 +1945,8 @@ build_agf_agfl(
 		 * Don't count the root blocks as they are already
 		 * accounted for.
 		 */
-		blks = (bno_bt->num_tot_blocks - bno_bt->num_free_blocks) +
-			(bcnt_bt->num_tot_blocks - bcnt_bt->num_free_blocks) -
-			2;
+		blks = btr_bno->newbt.afake.af_blocks +
+			btr_cnt->newbt.afake.af_blocks - 2;
 		if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 			blks += rmap_bt->num_tot_blocks - rmap_bt->num_free_blocks - 1;
 		agf->agf_btreeblks = cpu_to_be32(blks);
@@ -2272,49 +1984,14 @@ build_agf_agfl(
 			agfl->agfl_bno[i] = cpu_to_be32(NULLAGBLOCK);
 	}
 	freelist = XFS_BUF_TO_AGFL_BNO(mp, agfl_buf);
+	i = 0;
 
-	/*
-	 * do we have left-over blocks in the btree cursors that should
-	 * be used to fill the AGFL?
-	 */
-	if (bno_bt->num_free_blocks > 0 || bcnt_bt->num_free_blocks > 0)  {
-		/*
-		 * yes, now grab as many blocks as we can
-		 */
-		i = 0;
-		while (bno_bt->num_free_blocks > 0 && i < libxfs_agfl_size(mp))
-		{
-			freelist[i] = cpu_to_be32(
-					get_next_blockaddr(agno, 0, bno_bt));
-			i++;
-		}
-
-		while (bcnt_bt->num_free_blocks > 0 && i < libxfs_agfl_size(mp))
-		{
-			freelist[i] = cpu_to_be32(
-					get_next_blockaddr(agno, 0, bcnt_bt));
-			i++;
-		}
-		/*
-		 * now throw the rest of the blocks away and complain
-		 */
-		while (bno_bt->num_free_blocks > 0) {
-			fsb = XFS_AGB_TO_FSB(mp, agno,
-					get_next_blockaddr(agno, 0, bno_bt));
-			error = slab_add(lost_fsb, &fsb);
-			if (error)
-				do_error(
-_("Insufficient memory saving lost blocks.\n"));
-		}
-		while (bcnt_bt->num_free_blocks > 0) {
-			fsb = XFS_AGB_TO_FSB(mp, agno,
-					get_next_blockaddr(agno, 0, bcnt_bt));
-			error = slab_add(lost_fsb, &fsb);
-			if (error)
-				do_error(
-_("Insufficient memory saving lost blocks.\n"));
-		}
+	/* Fill the AGFL with leftover blocks or save them for later. */
+	fill_agfl(btr_bno, freelist, &i);
+	fill_agfl(btr_cnt, freelist, &i);
 
+	/* Set the AGF counters for the AGFL. */
+	if (i > 0) {
 		agf->agf_flfirst = 0;
 		agf->agf_fllast = cpu_to_be32(i - 1);
 		agf->agf_flcount = cpu_to_be32(i);
@@ -2409,8 +2086,8 @@ phase5_func(
 	uint64_t		num_free_inos;
 	uint64_t		finobt_num_inos;
 	uint64_t		finobt_num_free_inos;
-	bt_status_t		bno_btree_curs;
-	bt_status_t		bcnt_btree_curs;
+	struct bt_rebuild	btr_bno;
+	struct bt_rebuild	btr_cnt;
 	bt_status_t		ino_btree_curs;
 	bt_status_t		fino_btree_curs;
 	bt_status_t		rmap_btree_curs;
@@ -2418,9 +2095,7 @@ phase5_func(
 	int			extra_blocks = 0;
 	uint			num_freeblocks;
 	xfs_extlen_t		freeblks1;
-#ifdef DEBUG
 	xfs_extlen_t		freeblks2;
-#endif
 	xfs_agblock_t		num_extents;
 
 	if (verbose)
@@ -2429,7 +2104,7 @@ phase5_func(
 	/*
 	 * build up incore bno and bcnt extent btrees
 	 */
-	num_extents = mk_incore_fstree(mp, agno);
+	num_extents = mk_incore_fstree(mp, agno, &num_freeblocks);
 
 #ifdef XR_BLD_FREE_TRACE
 	fprintf(stderr, "# of bno extents is %d\n",
@@ -2508,8 +2183,8 @@ phase5_func(
 	/*
 	 * track blocks that we might really lose
 	 */
-	extra_blocks = calculate_freespace_cursor(mp, agno,
-				&num_extents, &bno_btree_curs);
+	init_freespace_cursors(&sc, agno, num_freeblocks, &num_extents,
+			&extra_blocks, &btr_bno, &btr_cnt);
 
 	/*
 	 * freespace btrees live in the "free space" but
@@ -2527,13 +2202,6 @@ phase5_func(
 	if (extra_blocks > 0)
 		sb_fdblocks_ag[agno] -= extra_blocks;
 
-	bcnt_btree_curs = bno_btree_curs;
-
-	bno_btree_curs.owner = XFS_RMAP_OWN_AG;
-	bcnt_btree_curs.owner = XFS_RMAP_OWN_AG;
-	setup_cursor(mp, agno, &bno_btree_curs);
-	setup_cursor(mp, agno, &bcnt_btree_curs);
-
 #ifdef XR_BLD_FREE_TRACE
 	fprintf(stderr, "# of bno extents is %d\n",
 			count_bno_extents(agno));
@@ -2541,25 +2209,13 @@ phase5_func(
 			count_bcnt_extents(agno));
 #endif
 
-	/*
-	 * now rebuild the freespace trees
-	 */
-	freeblks1 = build_freespace_tree(mp, agno,
-				&bno_btree_curs, XFS_BTNUM_BNO);
+	/* Rebuild the freespace btrees. */
+	build_bnobt(&sc, agno, &btr_bno, &freeblks1);
+	build_cntbt(&sc, agno, &btr_cnt, &freeblks2);
+
 #ifdef XR_BLD_FREE_TRACE
-	fprintf(stderr, "# of free blocks == %d\n", freeblks1);
-#endif
-	write_cursor(&bno_btree_curs);
-
-#ifdef DEBUG
-	freeblks2 = build_freespace_tree(mp, agno,
-				&bcnt_btree_curs, XFS_BTNUM_CNT);
-#else
-	(void) build_freespace_tree(mp, agno,
-				&bcnt_btree_curs, XFS_BTNUM_CNT);
+	fprintf(stderr, "# of free blocks == %d/%d\n", freeblks1, freeblks2);
 #endif
-	write_cursor(&bcnt_btree_curs);
-
 	ASSERT(freeblks1 == freeblks2);
 
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
@@ -2577,9 +2233,9 @@ phase5_func(
 	/*
 	 * set up agf and agfl
 	 */
-	build_agf_agfl(mp, agno, &bno_btree_curs,
-			&bcnt_btree_curs, freeblks1, extra_blocks,
+	build_agf_agfl(mp, agno, &btr_bno, &btr_cnt, freeblks1, extra_blocks,
 			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
+
 	/*
 	 * build inode allocation tree.
 	 */
@@ -2603,15 +2259,14 @@ phase5_func(
 	/*
 	 * tear down cursors
 	 */
-	finish_cursor(&bno_btree_curs);
-	finish_cursor(&ino_btree_curs);
+	finish_rebuild(mp, &btr_bno, lost_fsb);
+	finish_rebuild(mp, &btr_cnt, lost_fsb);
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 		finish_cursor(&rmap_btree_curs);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
 		finish_cursor(&refcnt_btree_curs);
 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
 		finish_cursor(&fino_btree_curs);
-	finish_cursor(&bcnt_btree_curs);
 
 	/*
 	 * release the incore per-AG bno/bcnt trees so


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 5/9] xfs_repair: rebuild inode btrees with bulk loader
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (3 preceding siblings ...)
  2020-01-01  1:21 ` [PATCH 4/9] xfs_repair: rebuild free space btrees with bulk loader Darrick J. Wong
@ 2020-01-01  1:21 ` Darrick J. Wong
  2020-01-01  1:22 ` [PATCH 6/9] xfs_repair: rebuild reverse mapping " Darrick J. Wong
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the btree bulk loading functions to rebuild the inode btrees
and drop the open-coded implementation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    1 
 repair/phase5.c          |  607 +++++++++++++++++-----------------------------
 2 files changed, 227 insertions(+), 381 deletions(-)


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 60dc9297..468503c6 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -181,5 +181,6 @@
 #define xfs_btree_bload_compute_geometry libxfs_btree_bload_compute_geometry
 #define xfs_btree_bload			libxfs_btree_bload
 #define xfs_allocbt_stage_cursor	libxfs_allocbt_stage_cursor
+#define xfs_inobt_stage_cursor		libxfs_inobt_stage_cursor
 
 #endif /* __LIBXFS_API_DEFS_H__ */
diff --git a/repair/phase5.c b/repair/phase5.c
index 2421c4bc..1285527a 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -75,6 +75,10 @@ struct bt_rebuild {
 			struct extent_tree_node	*bno_rec;
 			xfs_agblock_t		*freeblks;
 		};
+		struct {
+			struct ino_tree_node	*ino_rec;
+			struct agi_stat		*agi_stat;
+		};
 	};
 };
 
@@ -764,48 +768,40 @@ _("Error %d while writing cntbt btree for AG %u.\n"), error, agno);
 	sc->tp = NULL;
 }
 
-/*
- * XXX(hch): any reason we don't just look at mp->m_inobt_mxr?
- */
-#define XR_INOBT_BLOCK_MAXRECS(mp, level) \
-			libxfs_inobt_maxrecs((mp), (mp)->m_sb.sb_blocksize, \
-						(level) == 0)
+/* Inode Btrees */
 
-/*
- * we don't have to worry here about how chewing up free extents
- * may perturb things because inode tree building happens before
- * freespace tree building.
- */
+/* Initialize both inode btree cursors as needed. */
 static void
-init_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
-		uint64_t *num_inos, uint64_t *num_free_inos, int finobt)
+init_ino_cursors(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	unsigned int		free_space,
+	uint64_t		*num_inos,
+	uint64_t		*num_free_inos,
+	struct bt_rebuild	*btr_ino,
+	struct bt_rebuild	*btr_fino)
 {
-	uint64_t		ninos;
-	uint64_t		nfinos;
-	int			rec_nfinos;
-	int			rec_ninos;
-	ino_tree_node_t		*ino_rec;
-	int			num_recs;
-	int			level;
-	bt_stat_level_t		*lptr;
-	bt_stat_level_t		*p_lptr;
-	xfs_extlen_t		blocks_allocated;
-	int			i;
+	struct xfs_btree_cur	*cur;
+	struct ino_tree_node	*ino_rec;
+	unsigned int		ino_recs = 0;
+	unsigned int		fino_recs = 0;
+	bool			finobt;
+	int			error;
 
-	*num_inos = *num_free_inos = 0;
-	ninos = nfinos = 0;
+	finobt = xfs_sb_version_hasfinobt(&sc->mp->m_sb);
+	init_rebuild(sc, &XFS_RMAP_OINFO_INOBT, free_space, btr_ino);
+	init_rebuild(sc, &XFS_RMAP_OINFO_INOBT, free_space, btr_fino);
 
-	lptr = &btree_curs->level[0];
-	btree_curs->init = 1;
-	btree_curs->owner = XFS_RMAP_OWN_INOBT;
+	/* Compute inode statistics. */
+	*num_free_inos = 0;
+	*num_inos = 0;
+	for (ino_rec = findfirst_inode_rec(agno);
+	     ino_rec != NULL;
+	     ino_rec = next_ino_rec(ino_rec))  {
+		unsigned int	rec_ninos = 0;
+		unsigned int	rec_nfinos = 0;
+		int		i;
 
-	/*
-	 * build up statistics
-	 */
-	ino_rec = findfirst_inode_rec(agno);
-	for (num_recs = 0; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec))  {
-		rec_ninos = 0;
-		rec_nfinos = 0;
 		for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
 			ASSERT(is_inode_confirmed(ino_rec, i));
 			/*
@@ -819,168 +815,222 @@ init_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
 			rec_ninos++;
 		}
 
-		/*
-		 * finobt only considers records with free inodes
-		 */
-		if (finobt && !rec_nfinos)
-			continue;
+		*num_free_inos += rec_nfinos;
+		*num_inos += rec_ninos;
+		ino_recs++;
 
-		nfinos += rec_nfinos;
-		ninos += rec_ninos;
-		num_recs++;
+		/* finobt only considers records with free inodes */
+		if (rec_nfinos)
+			fino_recs++;
 	}
 
-	if (num_recs == 0) {
-		/*
-		 * easy corner-case -- no inode records
-		 */
-		lptr->num_blocks = 1;
-		lptr->modulo = 0;
-		lptr->num_recs_pb = 0;
-		lptr->num_recs_tot = 0;
-
-		btree_curs->num_levels = 1;
-		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
+	/* Compute how many inobt blocks we'll need. */
+	cur = libxfs_inobt_stage_cursor(sc->mp, sc->tp,
+			&btr_ino->newbt.afake, agno, XFS_BTNUM_INO);
+	error = -libxfs_btree_bload_compute_geometry(cur, &btr_ino->bload,
+			ino_recs);
+	if (error)
+		do_error(
+_("Unable to compute inode btree geometry, error %d.\n"), error);
+	libxfs_btree_del_cursor(cur, error);
 
-		setup_cursor(mp, agno, btree_curs);
+	setup_rebuild(sc->mp, agno, btr_ino, btr_ino->bload.nr_blocks);
 
+	if (!finobt)
 		return;
-	}
 
-	blocks_allocated = lptr->num_blocks = howmany(num_recs,
-					XR_INOBT_BLOCK_MAXRECS(mp, 0));
+	/* Compute how many finobt blocks we'll need. */
+	cur = libxfs_inobt_stage_cursor(sc->mp, sc->tp,
+			&btr_fino->newbt.afake, agno, XFS_BTNUM_FINO);
+	error = -libxfs_btree_bload_compute_geometry(cur, &btr_fino->bload,
+			fino_recs);
+	if (error)
+		do_error(
+_("Unable to compute free inode btree geometry, error %d.\n"), error);
+	libxfs_btree_del_cursor(cur, error);
 
-	lptr->modulo = num_recs % lptr->num_blocks;
-	lptr->num_recs_pb = num_recs / lptr->num_blocks;
-	lptr->num_recs_tot = num_recs;
-	level = 1;
+	setup_rebuild(sc->mp, agno, btr_fino, btr_fino->bload.nr_blocks);
+}
 
-	if (lptr->num_blocks > 1)  {
-		for (; btree_curs->level[level-1].num_blocks > 1
-				&& level < XFS_BTREE_MAXLEVELS;
-				level++)  {
-			lptr = &btree_curs->level[level];
-			p_lptr = &btree_curs->level[level - 1];
-			lptr->num_blocks = howmany(p_lptr->num_blocks,
-				XR_INOBT_BLOCK_MAXRECS(mp, level));
-			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
-			lptr->num_recs_pb = p_lptr->num_blocks
-					/ lptr->num_blocks;
-			lptr->num_recs_tot = p_lptr->num_blocks;
+/* Copy one incore inode record into the inobt cursor. */
+static void
+get_inode_data(
+	struct xfs_btree_cur		*cur,
+	struct ino_tree_node		*ino_rec,
+	struct agi_stat			*agi_stat)
+{
+	struct xfs_inobt_rec_incore	*irec = &cur->bc_rec.i;
+	int				inocnt = 0;
+	int				finocnt = 0;
+	int				k;
 
-			blocks_allocated += lptr->num_blocks;
-		}
+	irec->ir_startino = ino_rec->ino_startnum;
+	irec->ir_free = ino_rec->ir_free;
+
+	for (k = 0; k < sizeof(xfs_inofree_t) * NBBY; k++)  {
+		ASSERT(is_inode_confirmed(ino_rec, k));
+
+		if (is_inode_sparse(ino_rec, k))
+			continue;
+		if (is_inode_free(ino_rec, k))
+			finocnt++;
+		inocnt++;
 	}
-	ASSERT(lptr->num_blocks == 1);
-	btree_curs->num_levels = level;
 
-	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
-			= blocks_allocated;
+	irec->ir_count = inocnt;
+	irec->ir_freecount = finocnt;
 
-	setup_cursor(mp, agno, btree_curs);
+	if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+		uint64_t		sparse;
+		int			spmask;
+		uint16_t		holemask;
+
+		/*
+		 * Convert the 64-bit in-core sparse inode state to the
+		 * 16-bit on-disk holemask.
+		 */
+		holemask = 0;
+		spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
+		sparse = ino_rec->ir_sparse;
+		for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
+			if (sparse & spmask) {
+				ASSERT((sparse & spmask) == spmask);
+				holemask |= (1 << k);
+			} else
+				ASSERT((sparse & spmask) == 0);
+			sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
+		}
 
-	*num_inos = ninos;
-	*num_free_inos = nfinos;
+		irec->ir_holemask = holemask;
+	} else {
+		irec->ir_holemask = 0;
+	}
 
-	return;
+	if (!agi_stat)
+		return;
+
+	if (agi_stat->first_agino != NULLAGINO)
+		agi_stat->first_agino = ino_rec->ino_startnum;
+	agi_stat->freecount += finocnt;
+	agi_stat->count += inocnt;
 }
 
-static void
-prop_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
-	xfs_btnum_t btnum, xfs_agino_t startino, int level)
+/* Grab one inobt record. */
+static int
+get_inobt_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
 {
-	struct xfs_btree_block	*bt_hdr;
-	xfs_inobt_key_t		*bt_key;
-	xfs_inobt_ptr_t		*bt_ptr;
-	xfs_agblock_t		agbno;
-	bt_stat_level_t		*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
+	struct bt_rebuild		*rebuild = priv;
 
-	level++;
+	get_inode_data(cur, rebuild->ino_rec, rebuild->agi_stat);
+	rebuild->ino_rec = next_ino_rec(rebuild->ino_rec);
+	return 0;
+}
 
-	if (level >= btree_curs->num_levels)
-		return;
+/* Rebuild a inobt btree. */
+static void
+build_inobt(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_ino,
+	struct agi_stat		*agi_stat)
+{
+	struct xfs_btree_cur	*cur;
+	int			error;
 
-	lptr = &btree_curs->level[level];
-	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+	btr_ino->bload.get_data = get_inobt_data;
+	btr_ino->bload.alloc_block = rebuild_alloc_block;
+	agi_stat->count = agi_stat->freecount = 0;
+	agi_stat->first_agino = NULLAGINO;
+	btr_ino->agi_stat = agi_stat;
+	btr_ino->ino_rec = findfirst_inode_rec(agno);
 
-	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
-		/*
-		 * this only happens once to initialize the
-		 * first path up the left side of the tree
-		 * where the agbno's are already set up
-		 */
-		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
-	}
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
+	if (error)
+		do_error(
+_("Insufficient memory to construct inobt rebuild transaction.\n"));
 
-	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
-				lptr->num_recs_pb + (lptr->modulo > 0))  {
-		/*
-		 * write out current prev block, grab us a new block,
-		 * and set the rightsib pointer of current block
-		 */
-#ifdef XR_BLD_INO_TRACE
-		fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
-#endif
-		if (lptr->prev_agbno != NULLAGBLOCK)  {
-			ASSERT(lptr->prev_buf_p != NULL);
-			libxfs_writebuf(lptr->prev_buf_p, 0);
-		}
-		lptr->prev_agbno = lptr->agbno;;
-		lptr->prev_buf_p = lptr->buf_p;
-		agbno = get_next_blockaddr(agno, level, btree_curs);
+	/* Add all observed inobt records. */
+	cur = libxfs_inobt_stage_cursor(sc->mp, sc->tp,
+			&btr_ino->newbt.afake, agno, XFS_BTNUM_INO);
+	error = -libxfs_btree_bload(cur, &btr_ino->bload, btr_ino);
+	if (error)
+		do_error(
+_("Error %d while creating inobt btree for AG %u.\n"), error, agno);
 
-		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
+	/* Since we're not writing the AGI yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(cur, 0);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing inobt btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
+}
 
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		lptr->agbno = agbno;
+/* Grab one finobt record. */
+static int
+get_finobt_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
+{
+	struct bt_rebuild		*rebuild = priv;
 
-		if (lptr->modulo)
-			lptr->modulo--;
+	get_inode_data(cur, rebuild->ino_rec, NULL);
+	rebuild->ino_rec = next_free_ino_rec(rebuild->ino_rec);
+	return 0;
+}
 
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum,
-					level, 0, agno);
+/* Rebuild a finobt btree. */
+static void
+build_finobt(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_fino)
+{
+	struct xfs_btree_cur	*cur;
+	int			error;
 
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+	btr_fino->bload.get_data = get_finobt_data;
+	btr_fino->bload.alloc_block = rebuild_alloc_block;
+	btr_fino->ino_rec = findfirst_free_inode_rec(agno);
 
-		/*
-		 * propagate extent record for first extent in new block up
-		 */
-		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
-	}
-	/*
-	 * add inode info to current block
-	 */
-	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
+	if (error)
+		do_error(
+_("Insufficient memory to construct finobt rebuild transaction.\n"));
 
-	bt_key = XFS_INOBT_KEY_ADDR(mp, bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs));
-	bt_ptr = XFS_INOBT_PTR_ADDR(mp, bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs),
-				    M_IGEO(mp)->inobt_mxr[1]);
+	/* Add all observed finobt records. */
+	cur = libxfs_inobt_stage_cursor(sc->mp, sc->tp,
+			&btr_fino->newbt.afake, agno, XFS_BTNUM_FINO);
+	error = -libxfs_btree_bload(cur, &btr_fino->bload, btr_fino);
+	if (error)
+		do_error(
+_("Error %d while creating finobt btree for AG %u.\n"), error, agno);
 
-	bt_key->ir_startino = cpu_to_be32(startino);
-	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+	/* Since we're not writing the AGI yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(cur, 0);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing finobt btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
 }
 
 /*
  * XXX: yet more code that can be shared with mkfs, growfs.
  */
 static void
-build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
-		bt_status_t *finobt_curs, struct agi_stat *agi_stat)
+build_agi(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*ino_bt,
+	struct bt_rebuild	*fino_bt,
+	struct agi_stat		*agi_stat)
 {
-	xfs_buf_t	*agi_buf;
-	xfs_agi_t	*agi;
-	int		i;
+	struct xfs_buf		*agi_buf;
+	struct xfs_agi		*agi;
+	int			i;
 
 	agi_buf = libxfs_getbuf(mp->m_dev,
 			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
@@ -998,8 +1048,8 @@ build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
 		agi->agi_length = cpu_to_be32(mp->m_sb.sb_dblocks -
 			(xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno);
 	agi->agi_count = cpu_to_be32(agi_stat->count);
-	agi->agi_root = cpu_to_be32(btree_curs->root);
-	agi->agi_level = cpu_to_be32(btree_curs->num_levels);
+	agi->agi_root = cpu_to_be32(ino_bt->newbt.afake.af_root);
+	agi->agi_level = cpu_to_be32(ino_bt->newbt.afake.af_levels);
 	agi->agi_freecount = cpu_to_be32(agi_stat->freecount);
 	agi->agi_newino = cpu_to_be32(agi_stat->first_agino);
 	agi->agi_dirino = cpu_to_be32(NULLAGINO);
@@ -1011,192 +1061,13 @@ build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
 		platform_uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
 
 	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-		agi->agi_free_root = cpu_to_be32(finobt_curs->root);
-		agi->agi_free_level = cpu_to_be32(finobt_curs->num_levels);
+		agi->agi_free_root = cpu_to_be32(fino_bt->newbt.afake.af_root);
+		agi->agi_free_level = cpu_to_be32(fino_bt->newbt.afake.af_levels);
 	}
 
 	libxfs_writebuf(agi_buf, 0);
 }
 
-/*
- * rebuilds an inode tree given a cursor.  We're lazy here and call
- * the routine that builds the agi
- */
-static void
-build_ino_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
-		bt_status_t *btree_curs, xfs_btnum_t btnum,
-		struct agi_stat *agi_stat)
-{
-	xfs_agnumber_t		i;
-	xfs_agblock_t		j;
-	xfs_agblock_t		agbno;
-	xfs_agino_t		first_agino;
-	struct xfs_btree_block	*bt_hdr;
-	xfs_inobt_rec_t		*bt_rec;
-	ino_tree_node_t		*ino_rec;
-	bt_stat_level_t		*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
-	xfs_agino_t		count = 0;
-	xfs_agino_t		freecount = 0;
-	int			inocnt;
-	uint8_t			finocnt;
-	int			k;
-	int			level = btree_curs->num_levels;
-	int			spmask;
-	uint64_t		sparse;
-	uint16_t		holemask;
-
-	ASSERT(btnum == XFS_BTNUM_INO || btnum == XFS_BTNUM_FINO);
-
-	for (i = 0; i < level; i++)  {
-		lptr = &btree_curs->level[i];
-
-		agbno = get_next_blockaddr(agno, i, btree_curs);
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-
-		if (i == btree_curs->num_levels - 1)
-			btree_curs->root = agbno;
-
-		lptr->agbno = agbno;
-		lptr->prev_agbno = NULLAGBLOCK;
-		lptr->prev_buf_p = NULL;
-		/*
-		 * initialize block header
-		 */
-
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, i, 0, agno);
-	}
-
-	/*
-	 * run along leaf, setting up records.  as we have to switch
-	 * blocks, call the prop_ino_cursor routine to set up the new
-	 * pointers for the parent.  that can recurse up to the root
-	 * if required.  set the sibling pointers for leaf level here.
-	 */
-	if (btnum == XFS_BTNUM_FINO)
-		ino_rec = findfirst_free_inode_rec(agno);
-	else
-		ino_rec = findfirst_inode_rec(agno);
-
-	if (ino_rec != NULL)
-		first_agino = ino_rec->ino_startnum;
-	else
-		first_agino = NULLAGINO;
-
-	lptr = &btree_curs->level[0];
-
-	for (i = 0; i < lptr->num_blocks; i++)  {
-		/*
-		 * block initialization, lay in block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, 0, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-		bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
-							(lptr->modulo > 0));
-
-		if (lptr->modulo > 0)
-			lptr->modulo--;
-
-		if (lptr->num_recs_pb > 0)
-			prop_ino_cursor(mp, agno, btree_curs, btnum,
-					ino_rec->ino_startnum, 0);
-
-		bt_rec = (xfs_inobt_rec_t *)
-			  ((char *)bt_hdr + XFS_INOBT_BLOCK_LEN(mp));
-		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
-			ASSERT(ino_rec != NULL);
-			bt_rec[j].ir_startino =
-					cpu_to_be32(ino_rec->ino_startnum);
-			bt_rec[j].ir_free = cpu_to_be64(ino_rec->ir_free);
-
-			inocnt = finocnt = 0;
-			for (k = 0; k < sizeof(xfs_inofree_t)*NBBY; k++)  {
-				ASSERT(is_inode_confirmed(ino_rec, k));
-
-				if (is_inode_sparse(ino_rec, k))
-					continue;
-				if (is_inode_free(ino_rec, k))
-					finocnt++;
-				inocnt++;
-			}
-
-			/*
-			 * Set the freecount and check whether we need to update
-			 * the sparse format fields. Otherwise, skip to the next
-			 * record.
-			 */
-			inorec_set_freecount(mp, &bt_rec[j], finocnt);
-			if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
-				goto nextrec;
-
-			/*
-			 * Convert the 64-bit in-core sparse inode state to the
-			 * 16-bit on-disk holemask.
-			 */
-			holemask = 0;
-			spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
-			sparse = ino_rec->ir_sparse;
-			for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
-				if (sparse & spmask) {
-					ASSERT((sparse & spmask) == spmask);
-					holemask |= (1 << k);
-				} else
-					ASSERT((sparse & spmask) == 0);
-				sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
-			}
-
-			bt_rec[j].ir_u.sp.ir_count = inocnt;
-			bt_rec[j].ir_u.sp.ir_holemask = cpu_to_be16(holemask);
-
-nextrec:
-			freecount += finocnt;
-			count += inocnt;
-
-			if (btnum == XFS_BTNUM_FINO)
-				ino_rec = next_free_ino_rec(ino_rec);
-			else
-				ino_rec = next_ino_rec(ino_rec);
-		}
-
-		if (ino_rec != NULL)  {
-			/*
-			 * get next leaf level block
-			 */
-			if (lptr->prev_buf_p != NULL)  {
-#ifdef XR_BLD_INO_TRACE
-				fprintf(stderr, "writing inobt agbno %u\n",
-					lptr->prev_agbno);
-#endif
-				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
-				libxfs_writebuf(lptr->prev_buf_p, 0);
-			}
-			lptr->prev_buf_p = lptr->buf_p;
-			lptr->prev_agbno = lptr->agbno;
-			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
-			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
-
-			lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		}
-	}
-
-	if (agi_stat) {
-		agi_stat->first_agino = first_agino;
-		agi_stat->count = count;
-		agi_stat->freecount = freecount;
-	}
-}
-
 /* rebuild the rmap tree */
 
 /*
@@ -2082,14 +1953,10 @@ phase5_func(
 {
 	struct repair_ctx	sc = { .mp = mp, };
 	struct agi_stat		agi_stat = {0,};
-	uint64_t		num_inos;
-	uint64_t		num_free_inos;
-	uint64_t		finobt_num_inos;
-	uint64_t		finobt_num_free_inos;
 	struct bt_rebuild	btr_bno;
 	struct bt_rebuild	btr_cnt;
-	bt_status_t		ino_btree_curs;
-	bt_status_t		fino_btree_curs;
+	struct bt_rebuild	btr_ino;
+	struct bt_rebuild	btr_fino;
 	bt_status_t		rmap_btree_curs;
 	bt_status_t		refcnt_btree_curs;
 	int			extra_blocks = 0;
@@ -2126,21 +1993,8 @@ phase5_func(
 			agno);
 	}
 
-	/*
-	 * ok, now set up the btree cursors for the
-	 * on-disk btrees (includs pre-allocating all
-	 * required blocks for the trees themselves)
-	 */
-	init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos,
-			&num_free_inos, 0);
-
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
-		init_ino_cursor(mp, agno, &fino_btree_curs,
-				&finobt_num_inos, &finobt_num_free_inos,
-				1);
-
-	sb_icount_ag[agno] += num_inos;
-	sb_ifree_ag[agno] += num_free_inos;
+	init_ino_cursors(&sc, agno, num_freeblocks, &sb_icount_ag[agno],
+			&sb_ifree_ag[agno], &btr_ino, &btr_fino);
 
 	/*
 	 * Set up the btree cursors for the on-disk rmap btrees,
@@ -2237,36 +2091,27 @@ phase5_func(
 			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
 
 	/*
-	 * build inode allocation tree.
+	 * build inode allocation trees.
 	 */
-	build_ino_tree(mp, agno, &ino_btree_curs, XFS_BTNUM_INO,
-			&agi_stat);
-	write_cursor(&ino_btree_curs);
-
-	/*
-	 * build free inode tree
-	 */
-	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-		build_ino_tree(mp, agno, &fino_btree_curs,
-				XFS_BTNUM_FINO, NULL);
-		write_cursor(&fino_btree_curs);
-	}
+	build_inobt(&sc, agno, &btr_ino, &agi_stat);
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		build_finobt(&sc, agno, &btr_fino);
 
 	/* build the agi */
-	build_agi(mp, agno, &ino_btree_curs, &fino_btree_curs,
-		  &agi_stat);
+	build_agi(mp, agno, &btr_ino, &btr_fino, &agi_stat);
 
 	/*
 	 * tear down cursors
 	 */
 	finish_rebuild(mp, &btr_bno, lost_fsb);
 	finish_rebuild(mp, &btr_cnt, lost_fsb);
+	finish_rebuild(mp, &btr_ino, lost_fsb);
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		finish_rebuild(mp, &btr_fino, lost_fsb);
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 		finish_cursor(&rmap_btree_curs);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
 		finish_cursor(&refcnt_btree_curs);
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
-		finish_cursor(&fino_btree_curs);
 
 	/*
 	 * release the incore per-AG bno/bcnt trees so


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 6/9] xfs_repair: rebuild reverse mapping btrees with bulk loader
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (4 preceding siblings ...)
  2020-01-01  1:21 ` [PATCH 5/9] xfs_repair: rebuild inode " Darrick J. Wong
@ 2020-01-01  1:22 ` Darrick J. Wong
  2020-01-01  1:22 ` [PATCH 7/9] xfs_repair: rebuild refcount " Darrick J. Wong
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:22 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the btree bulk loading functions to rebuild the reverse mapping
btrees and drop the open-coded implementation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    1 
 repair/phase5.c          |  410 ++++++++--------------------------------------
 2 files changed, 71 insertions(+), 340 deletions(-)


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 468503c6..4fc26d15 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -182,5 +182,6 @@
 #define xfs_btree_bload			libxfs_btree_bload
 #define xfs_allocbt_stage_cursor	libxfs_allocbt_stage_cursor
 #define xfs_inobt_stage_cursor		libxfs_inobt_stage_cursor
+#define xfs_rmapbt_stage_cursor		libxfs_rmapbt_stage_cursor
 
 #endif /* __LIBXFS_API_DEFS_H__ */
diff --git a/repair/phase5.c b/repair/phase5.c
index 1285527a..ef120b5e 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -1070,359 +1070,89 @@ build_agi(
 
 /* rebuild the rmap tree */
 
-/*
- * we don't have to worry here about how chewing up free extents
- * may perturb things because rmap tree building happens before
- * freespace tree building.
- */
+/* Set up the rmap rebuild parameters. */
 static void
 init_rmapbt_cursor(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs)
-{
-	size_t			num_recs;
-	int			level;
-	struct bt_stat_level	*lptr;
-	struct bt_stat_level	*p_lptr;
-	xfs_extlen_t		blocks_allocated;
-	int			maxrecs;
-
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) {
-		memset(btree_curs, 0, sizeof(struct bt_status));
-		return;
-	}
-
-	lptr = &btree_curs->level[0];
-	btree_curs->init = 1;
-	btree_curs->owner = XFS_RMAP_OWN_AG;
-
-	/*
-	 * build up statistics
-	 */
-	num_recs = rmap_record_count(mp, agno);
-	if (num_recs == 0) {
-		/*
-		 * easy corner-case -- no rmap records
-		 */
-		lptr->num_blocks = 1;
-		lptr->modulo = 0;
-		lptr->num_recs_pb = 0;
-		lptr->num_recs_tot = 0;
-
-		btree_curs->num_levels = 1;
-		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
-
-		setup_cursor(mp, agno, btree_curs);
-
-		return;
-	}
-
-	/*
-	 * Leave enough slack in the rmapbt that we can insert the
-	 * metadata AG entries without too many splits.
-	 */
-	maxrecs = mp->m_rmap_mxr[0];
-	if (num_recs > maxrecs)
-		maxrecs -= 10;
-	blocks_allocated = lptr->num_blocks = howmany(num_recs, maxrecs);
-
-	lptr->modulo = num_recs % lptr->num_blocks;
-	lptr->num_recs_pb = num_recs / lptr->num_blocks;
-	lptr->num_recs_tot = num_recs;
-	level = 1;
-
-	if (lptr->num_blocks > 1)  {
-		for (; btree_curs->level[level-1].num_blocks > 1
-				&& level < XFS_BTREE_MAXLEVELS;
-				level++)  {
-			lptr = &btree_curs->level[level];
-			p_lptr = &btree_curs->level[level - 1];
-			lptr->num_blocks = howmany(p_lptr->num_blocks,
-				mp->m_rmap_mxr[1]);
-			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
-			lptr->num_recs_pb = p_lptr->num_blocks
-					/ lptr->num_blocks;
-			lptr->num_recs_tot = p_lptr->num_blocks;
-
-			blocks_allocated += lptr->num_blocks;
-		}
-	}
-	ASSERT(lptr->num_blocks == 1);
-	btree_curs->num_levels = level;
-
-	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
-			= blocks_allocated;
-
-	setup_cursor(mp, agno, btree_curs);
-}
-
-static void
-prop_rmap_cursor(
-	struct xfs_mount	*mp,
+	struct repair_ctx	*sc,
 	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs,
-	struct xfs_rmap_irec	*rm_rec,
-	int			level)
+	unsigned int		free_space,
+	struct bt_rebuild	*btr)
 {
-	struct xfs_btree_block	*bt_hdr;
-	struct xfs_rmap_key	*bt_key;
-	xfs_rmap_ptr_t		*bt_ptr;
-	xfs_agblock_t		agbno;
-	struct bt_stat_level	*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(XFS_BTNUM_RMAP);
+	struct xfs_btree_cur	*rmap_cur;
+	int			error;
 
-	level++;
+	init_rebuild(sc, &XFS_RMAP_OINFO_AG, free_space, btr);
 
-	if (level >= btree_curs->num_levels)
+	if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb))
 		return;
 
-	lptr = &btree_curs->level[level];
-	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
-		/*
-		 * this only happens once to initialize the
-		 * first path up the left side of the tree
-		 * where the agbno's are already set up
-		 */
-		prop_rmap_cursor(mp, agno, btree_curs, rm_rec, level);
-	}
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
-				lptr->num_recs_pb + (lptr->modulo > 0))  {
-		/*
-		 * write out current prev block, grab us a new block,
-		 * and set the rightsib pointer of current block
-		 */
-#ifdef XR_BLD_INO_TRACE
-		fprintf(stderr, " rmap prop agbno %d ", lptr->prev_agbno);
-#endif
-		if (lptr->prev_agbno != NULLAGBLOCK)  {
-			ASSERT(lptr->prev_buf_p != NULL);
-			libxfs_writebuf(lptr->prev_buf_p, 0);
-		}
-		lptr->prev_agbno = lptr->agbno;
-		lptr->prev_buf_p = lptr->buf_p;
-		agbno = get_next_blockaddr(agno, level, btree_curs);
-
-		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
-
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		lptr->agbno = agbno;
-
-		if (lptr->modulo)
-			lptr->modulo--;
-
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_RMAP,
-					level, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-
-		/*
-		 * propagate extent record for first extent in new block up
-		 */
-		prop_rmap_cursor(mp, agno, btree_curs, rm_rec, level);
-	}
-	/*
-	 * add rmap info to current block
-	 */
-	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
-
-	bt_key = XFS_RMAP_KEY_ADDR(bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs));
-	bt_ptr = XFS_RMAP_PTR_ADDR(bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs),
-				    mp->m_rmap_mxr[1]);
-
-	bt_key->rm_startblock = cpu_to_be32(rm_rec->rm_startblock);
-	bt_key->rm_owner = cpu_to_be64(rm_rec->rm_owner);
-	bt_key->rm_offset = cpu_to_be64(rm_rec->rm_offset);
+	/* Compute how many blocks we'll need. */
+	rmap_cur = libxfs_rmapbt_stage_cursor(sc->mp, sc->tp,
+			&btr->newbt.afake, agno);
+	error = -libxfs_btree_bload_compute_geometry(rmap_cur, &btr->bload,
+			rmap_record_count(sc->mp, agno));
+	if (error)
+		do_error(
+_("Unable to compute rmap btree geometry, error %d.\n"), error);
+	libxfs_btree_del_cursor(rmap_cur, error);
 
-	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+	setup_rebuild(sc->mp, agno, btr, btr->bload.nr_blocks);
 }
 
-static void
-prop_rmap_highkey(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs,
-	struct xfs_rmap_irec	*rm_highkey)
+/* Grab one rmap record. */
+static int
+get_rmap_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
 {
-	struct xfs_btree_block	*bt_hdr;
-	struct xfs_rmap_key	*bt_key;
-	struct bt_stat_level	*lptr;
-	struct xfs_rmap_irec	key = {0};
-	struct xfs_rmap_irec	high_key;
-	int			level;
-	int			i;
-	int			numrecs;
+	struct xfs_rmap_irec		*rmap = &cur->bc_rec.r;
+	struct xfs_rmap_irec		*rec;
+	struct bt_rebuild		*btr = priv;
 
-	high_key = *rm_highkey;
-	for (level = 1; level < btree_curs->num_levels; level++) {
-		lptr = &btree_curs->level[level];
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		numrecs = be16_to_cpu(bt_hdr->bb_numrecs);
-		bt_key = XFS_RMAP_HIGH_KEY_ADDR(bt_hdr, numrecs);
-
-		bt_key->rm_startblock = cpu_to_be32(high_key.rm_startblock);
-		bt_key->rm_owner = cpu_to_be64(high_key.rm_owner);
-		bt_key->rm_offset = cpu_to_be64(
-				libxfs_rmap_irec_offset_pack(&high_key));
-
-		for (i = 1; i <= numrecs; i++) {
-			bt_key = XFS_RMAP_HIGH_KEY_ADDR(bt_hdr, i);
-			key.rm_startblock = be32_to_cpu(bt_key->rm_startblock);
-			key.rm_owner = be64_to_cpu(bt_key->rm_owner);
-			key.rm_offset = be64_to_cpu(bt_key->rm_offset);
-			if (rmap_diffkeys(&key, &high_key) > 0)
-				high_key = key;
-		}
-	}
+	rec = pop_slab_cursor(btr->slab_cursor);
+	memcpy(rmap, rec, sizeof(struct xfs_rmap_irec));
+	return 0;
 }
 
-/*
- * rebuilds a rmap btree given a cursor.
- */
+/* Rebuild a rmap btree. */
 static void
 build_rmap_tree(
-	struct xfs_mount	*mp,
+	struct repair_ctx	*sc,
 	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs)
+	struct bt_rebuild	*btr)
 {
-	xfs_agnumber_t		i;
-	xfs_agblock_t		j;
-	xfs_agblock_t		agbno;
-	struct xfs_btree_block	*bt_hdr;
-	struct xfs_rmap_irec	*rm_rec;
-	struct xfs_slab_cursor	*rmap_cur;
-	struct xfs_rmap_rec	*bt_rec;
-	struct xfs_rmap_irec	highest_key = {0};
-	struct xfs_rmap_irec	hi_key = {0};
-	struct bt_stat_level	*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(XFS_BTNUM_RMAP);
-	int			numrecs;
-	int			level = btree_curs->num_levels;
+	struct xfs_btree_cur	*rmap_cur;
 	int			error;
 
-	highest_key.rm_flags = 0;
-	for (i = 0; i < level; i++)  {
-		lptr = &btree_curs->level[i];
-
-		agbno = get_next_blockaddr(agno, i, btree_curs);
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-
-		if (i == btree_curs->num_levels - 1)
-			btree_curs->root = agbno;
-
-		lptr->agbno = agbno;
-		lptr->prev_agbno = NULLAGBLOCK;
-		lptr->prev_buf_p = NULL;
-		/*
-		 * initialize block header
-		 */
+	btr->bload.get_data = get_rmap_data;
+	btr->bload.alloc_block = rebuild_alloc_block;
 
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_RMAP,
-					i, 0, agno);
-	}
-
-	/*
-	 * run along leaf, setting up records.  as we have to switch
-	 * blocks, call the prop_rmap_cursor routine to set up the new
-	 * pointers for the parent.  that can recurse up to the root
-	 * if required.  set the sibling pointers for leaf level here.
-	 */
-	error = rmap_init_cursor(agno, &rmap_cur);
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
 	if (error)
 		do_error(
-_("Insufficient memory to construct reverse-map cursor."));
-	rm_rec = pop_slab_cursor(rmap_cur);
-	lptr = &btree_curs->level[0];
-
-	for (i = 0; i < lptr->num_blocks; i++)  {
-		numrecs = lptr->num_recs_pb + (lptr->modulo > 0);
-		ASSERT(rm_rec != NULL || numrecs == 0);
-
-		/*
-		 * block initialization, lay in block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_RMAP,
-					0, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-		bt_hdr->bb_numrecs = cpu_to_be16(numrecs);
-
-		if (lptr->modulo > 0)
-			lptr->modulo--;
-
-		if (lptr->num_recs_pb > 0) {
-			ASSERT(rm_rec != NULL);
-			prop_rmap_cursor(mp, agno, btree_curs, rm_rec, 0);
-		}
+_("Insufficient memory to construct rmap rebuild transaction.\n"));
 
-		bt_rec = (struct xfs_rmap_rec *)
-			  ((char *)bt_hdr + XFS_RMAP_BLOCK_LEN);
-		highest_key.rm_startblock = 0;
-		highest_key.rm_owner = 0;
-		highest_key.rm_offset = 0;
-		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
-			ASSERT(rm_rec != NULL);
-			bt_rec[j].rm_startblock =
-					cpu_to_be32(rm_rec->rm_startblock);
-			bt_rec[j].rm_blockcount =
-					cpu_to_be32(rm_rec->rm_blockcount);
-			bt_rec[j].rm_owner = cpu_to_be64(rm_rec->rm_owner);
-			bt_rec[j].rm_offset = cpu_to_be64(
-					libxfs_rmap_irec_offset_pack(rm_rec));
-			rmap_high_key_from_rec(rm_rec, &hi_key);
-			if (rmap_diffkeys(&hi_key, &highest_key) > 0)
-				highest_key = hi_key;
-
-			rm_rec = pop_slab_cursor(rmap_cur);
-		}
-
-		/* Now go set the parent key */
-		prop_rmap_highkey(mp, agno, btree_curs, &highest_key);
+	error = rmap_init_cursor(agno, &btr->slab_cursor);
+	if (error)
+		do_error(
+_("Insufficient memory to construct rmap cursor.\n"));
 
-		if (rm_rec != NULL)  {
-			/*
-			 * get next leaf level block
-			 */
-			if (lptr->prev_buf_p != NULL)  {
-#ifdef XR_BLD_RL_TRACE
-				fprintf(stderr, "writing rmapbt agbno %u\n",
-					lptr->prev_agbno);
-#endif
-				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
-				libxfs_writebuf(lptr->prev_buf_p, 0);
-			}
-			lptr->prev_buf_p = lptr->buf_p;
-			lptr->prev_agbno = lptr->agbno;
-			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
-			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
+	/* Add all observed rmap records. */
+	rmap_cur = libxfs_rmapbt_stage_cursor(sc->mp, sc->tp,
+			&btr->newbt.afake, agno);
+	error = -libxfs_btree_bload(rmap_cur, &btr->bload, btr);
+	if (error)
+		do_error(
+_("Error %d while creating rmap btree for AG %u.\n"), error, agno);
 
-			lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		}
-	}
-	free_slab_cursor(&rmap_cur);
+	/* Since we're not writing the AGF yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(rmap_cur, 0);
+	free_slab_cursor(&btr->slab_cursor);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing rmap btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
 }
 
 /* rebuild the refcount tree */
@@ -1753,7 +1483,7 @@ build_agf_agfl(
 	struct bt_rebuild	*btr_cnt,
 	xfs_extlen_t		freeblks,	/* # free blocks in tree */
 	int			lostblocks,	/* # blocks that will be lost */
-	struct bt_status	*rmap_bt,
+	struct bt_rebuild	*btr_rmap,
 	struct bt_status	*refcnt_bt,
 	struct xfs_slab		*lost_fsb)
 {
@@ -1797,11 +1527,12 @@ build_agf_agfl(
 			cpu_to_be32(btr_cnt->newbt.afake.af_root);
 	agf->agf_levels[XFS_BTNUM_CNT] =
 			cpu_to_be32(btr_cnt->newbt.afake.af_levels);
-	agf->agf_roots[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->root);
-	agf->agf_levels[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->num_levels);
+	agf->agf_roots[XFS_BTNUM_RMAP] =
+			cpu_to_be32(btr_rmap->newbt.afake.af_root);
+	agf->agf_levels[XFS_BTNUM_RMAP] =
+			cpu_to_be32(btr_rmap->newbt.afake.af_levels);
 	agf->agf_freeblks = cpu_to_be32(freeblks);
-	agf->agf_rmap_blocks = cpu_to_be32(rmap_bt->num_tot_blocks -
-			rmap_bt->num_free_blocks);
+	agf->agf_rmap_blocks = cpu_to_be32(btr_rmap->newbt.afake.af_blocks);
 	agf->agf_refcount_root = cpu_to_be32(refcnt_bt->root);
 	agf->agf_refcount_level = cpu_to_be32(refcnt_bt->num_levels);
 	agf->agf_refcount_blocks = cpu_to_be32(refcnt_bt->num_tot_blocks -
@@ -1819,7 +1550,7 @@ build_agf_agfl(
 		blks = btr_bno->newbt.afake.af_blocks +
 			btr_cnt->newbt.afake.af_blocks - 2;
 		if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-			blks += rmap_bt->num_tot_blocks - rmap_bt->num_free_blocks - 1;
+			blks += btr_rmap->newbt.afake.af_blocks - 1;
 		agf->agf_btreeblks = cpu_to_be32(blks);
 #ifdef XR_BLD_FREE_TRACE
 		fprintf(stderr, "agf->agf_btreeblks = %u\n",
@@ -1860,6 +1591,7 @@ build_agf_agfl(
 	/* Fill the AGFL with leftover blocks or save them for later. */
 	fill_agfl(btr_bno, freelist, &i);
 	fill_agfl(btr_cnt, freelist, &i);
+	fill_agfl(btr_rmap, freelist, &i);
 
 	/* Set the AGF counters for the AGFL. */
 	if (i > 0) {
@@ -1957,7 +1689,7 @@ phase5_func(
 	struct bt_rebuild	btr_cnt;
 	struct bt_rebuild	btr_ino;
 	struct bt_rebuild	btr_fino;
-	bt_status_t		rmap_btree_curs;
+	struct bt_rebuild	btr_rmap;
 	bt_status_t		refcnt_btree_curs;
 	int			extra_blocks = 0;
 	uint			num_freeblocks;
@@ -2000,7 +1732,7 @@ phase5_func(
 	 * Set up the btree cursors for the on-disk rmap btrees,
 	 * which includes pre-allocating all required blocks.
 	 */
-	init_rmapbt_cursor(mp, agno, &rmap_btree_curs);
+	init_rmapbt_cursor(&sc, agno, num_freeblocks, &btr_rmap);
 
 	/*
 	 * Set up the btree cursors for the on-disk refcount btrees,
@@ -2073,10 +1805,8 @@ phase5_func(
 	ASSERT(freeblks1 == freeblks2);
 
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
-		build_rmap_tree(mp, agno, &rmap_btree_curs);
-		write_cursor(&rmap_btree_curs);
-		sb_fdblocks_ag[agno] += (rmap_btree_curs.num_tot_blocks -
-				rmap_btree_curs.num_free_blocks) - 1;
+		build_rmap_tree(&sc, agno, &btr_rmap);
+		sb_fdblocks_ag[agno] += btr_rmap.newbt.afake.af_blocks - 1;
 	}
 
 	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
@@ -2088,7 +1818,7 @@ phase5_func(
 	 * set up agf and agfl
 	 */
 	build_agf_agfl(mp, agno, &btr_bno, &btr_cnt, freeblks1, extra_blocks,
-			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
+			&btr_rmap, &refcnt_btree_curs, lost_fsb);
 
 	/*
 	 * build inode allocation trees.
@@ -2109,7 +1839,7 @@ phase5_func(
 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
 		finish_rebuild(mp, &btr_fino, lost_fsb);
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-		finish_cursor(&rmap_btree_curs);
+		finish_rebuild(mp, &btr_rmap, lost_fsb);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
 		finish_cursor(&refcnt_btree_curs);
 


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 7/9] xfs_repair: rebuild refcount btrees with bulk loader
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (5 preceding siblings ...)
  2020-01-01  1:22 ` [PATCH 6/9] xfs_repair: rebuild reverse mapping " Darrick J. Wong
@ 2020-01-01  1:22 ` Darrick J. Wong
  2020-01-01  1:22 ` [PATCH 8/9] xfs_repair: remove old btree rebuild support code Darrick J. Wong
  2020-01-01  1:22 ` [PATCH 9/9] xfs_repair: track blocks lost during btree construction via extents Darrick J. Wong
  8 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:22 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the btree bulk loading functions to rebuild the refcount btrees
and drop the open-coded implementation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    1 
 repair/phase5.c          |  347 +++++++++-------------------------------------
 2 files changed, 72 insertions(+), 276 deletions(-)


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 4fc26d15..72605d4d 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -183,5 +183,6 @@
 #define xfs_allocbt_stage_cursor	libxfs_allocbt_stage_cursor
 #define xfs_inobt_stage_cursor		libxfs_inobt_stage_cursor
 #define xfs_rmapbt_stage_cursor		libxfs_rmapbt_stage_cursor
+#define xfs_refcountbt_stage_cursor	libxfs_refcountbt_stage_cursor
 
 #endif /* __LIBXFS_API_DEFS_H__ */
diff --git a/repair/phase5.c b/repair/phase5.c
index ef120b5e..ee4a4563 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -1157,295 +1157,89 @@ _("Error %d while writing rmap btree for AG %u.\n"), error, agno);
 
 /* rebuild the refcount tree */
 
-/*
- * we don't have to worry here about how chewing up free extents
- * may perturb things because reflink tree building happens before
- * freespace tree building.
- */
+/* Set up the refcount rebuild parameters. */
 static void
 init_refc_cursor(
-	struct xfs_mount	*mp,
+	struct repair_ctx	*sc,
 	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs)
+	unsigned int		free_space,
+	struct bt_rebuild	*btr)
 {
-	size_t			num_recs;
-	int			level;
-	struct bt_stat_level	*lptr;
-	struct bt_stat_level	*p_lptr;
-	xfs_extlen_t		blocks_allocated;
-
-	if (!xfs_sb_version_hasreflink(&mp->m_sb)) {
-		memset(btree_curs, 0, sizeof(struct bt_status));
-		return;
-	}
-
-	lptr = &btree_curs->level[0];
-	btree_curs->init = 1;
-	btree_curs->owner = XFS_RMAP_OWN_REFC;
-
-	/*
-	 * build up statistics
-	 */
-	num_recs = refcount_record_count(mp, agno);
-	if (num_recs == 0) {
-		/*
-		 * easy corner-case -- no refcount records
-		 */
-		lptr->num_blocks = 1;
-		lptr->modulo = 0;
-		lptr->num_recs_pb = 0;
-		lptr->num_recs_tot = 0;
-
-		btree_curs->num_levels = 1;
-		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
+	struct xfs_btree_cur	*refc_cur;
+	int			error;
 
-		setup_cursor(mp, agno, btree_curs);
+	init_rebuild(sc, &XFS_RMAP_OINFO_REFC, free_space, btr);
 
+	if (!xfs_sb_version_hasreflink(&sc->mp->m_sb))
 		return;
-	}
 
-	blocks_allocated = lptr->num_blocks = howmany(num_recs,
-					mp->m_refc_mxr[0]);
-
-	lptr->modulo = num_recs % lptr->num_blocks;
-	lptr->num_recs_pb = num_recs / lptr->num_blocks;
-	lptr->num_recs_tot = num_recs;
-	level = 1;
-
-	if (lptr->num_blocks > 1)  {
-		for (; btree_curs->level[level-1].num_blocks > 1
-				&& level < XFS_BTREE_MAXLEVELS;
-				level++)  {
-			lptr = &btree_curs->level[level];
-			p_lptr = &btree_curs->level[level - 1];
-			lptr->num_blocks = howmany(p_lptr->num_blocks,
-					mp->m_refc_mxr[1]);
-			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
-			lptr->num_recs_pb = p_lptr->num_blocks
-					/ lptr->num_blocks;
-			lptr->num_recs_tot = p_lptr->num_blocks;
-
-			blocks_allocated += lptr->num_blocks;
-		}
-	}
-	ASSERT(lptr->num_blocks == 1);
-	btree_curs->num_levels = level;
-
-	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
-			= blocks_allocated;
+	/* Compute how many blocks we'll need. */
+	refc_cur = libxfs_refcountbt_stage_cursor(sc->mp, sc->tp,
+			&btr->newbt.afake, agno);
+	error = -libxfs_btree_bload_compute_geometry(refc_cur, &btr->bload,
+			refcount_record_count(sc->mp, agno));
+	if (error)
+		do_error(
+_("Unable to compute refcount btree geometry, error %d.\n"), error);
+	libxfs_btree_del_cursor(refc_cur, error);
 
-	setup_cursor(mp, agno, btree_curs);
+	setup_rebuild(sc->mp, agno, btr, btr->bload.nr_blocks);
 }
 
-static void
-prop_refc_cursor(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs,
-	xfs_agblock_t		startbno,
-	int			level)
+/* Grab one refcount record. */
+static int
+get_refcount_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
 {
-	struct xfs_btree_block	*bt_hdr;
-	struct xfs_refcount_key	*bt_key;
-	xfs_refcount_ptr_t	*bt_ptr;
-	xfs_agblock_t		agbno;
-	struct bt_stat_level	*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(XFS_BTNUM_REFC);
-
-	level++;
-
-	if (level >= btree_curs->num_levels)
-		return;
-
-	lptr = &btree_curs->level[level];
-	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
-		/*
-		 * this only happens once to initialize the
-		 * first path up the left side of the tree
-		 * where the agbno's are already set up
-		 */
-		prop_refc_cursor(mp, agno, btree_curs, startbno, level);
-	}
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
-				lptr->num_recs_pb + (lptr->modulo > 0))  {
-		/*
-		 * write out current prev block, grab us a new block,
-		 * and set the rightsib pointer of current block
-		 */
-#ifdef XR_BLD_INO_TRACE
-		fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
-#endif
-		if (lptr->prev_agbno != NULLAGBLOCK)  {
-			ASSERT(lptr->prev_buf_p != NULL);
-			libxfs_writebuf(lptr->prev_buf_p, 0);
-		}
-		lptr->prev_agbno = lptr->agbno;
-		lptr->prev_buf_p = lptr->buf_p;
-		agbno = get_next_blockaddr(agno, level, btree_curs);
-
-		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
-
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		lptr->agbno = agbno;
-
-		if (lptr->modulo)
-			lptr->modulo--;
-
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_REFC,
-					level, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-
-		/*
-		 * propagate extent record for first extent in new block up
-		 */
-		prop_refc_cursor(mp, agno, btree_curs, startbno, level);
-	}
-	/*
-	 * add inode info to current block
-	 */
-	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
-
-	bt_key = XFS_REFCOUNT_KEY_ADDR(bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs));
-	bt_ptr = XFS_REFCOUNT_PTR_ADDR(bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs),
-				    mp->m_refc_mxr[1]);
+	struct xfs_refcount_irec	*refc = &cur->bc_rec.rc;
+	struct xfs_refcount_irec	*rec;
+	struct bt_rebuild		*btr = priv;
 
-	bt_key->rc_startblock = cpu_to_be32(startbno);
-	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+	rec = pop_slab_cursor(btr->slab_cursor);
+	memcpy(refc, rec, sizeof(struct xfs_refcount_irec));
+	return 0;
 }
 
-/*
- * rebuilds a refcount btree given a cursor.
- */
+/* Rebuild a refcount btree. */
 static void
 build_refcount_tree(
-	struct xfs_mount	*mp,
+	struct repair_ctx	*sc,
 	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs)
+	struct bt_rebuild	*btr)
 {
-	xfs_agnumber_t		i;
-	xfs_agblock_t		j;
-	xfs_agblock_t		agbno;
-	struct xfs_btree_block	*bt_hdr;
-	struct xfs_refcount_irec	*refc_rec;
-	struct xfs_slab_cursor	*refc_cur;
-	struct xfs_refcount_rec	*bt_rec;
-	struct bt_stat_level	*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(XFS_BTNUM_REFC);
-	int			numrecs;
-	int			level = btree_curs->num_levels;
+	struct xfs_btree_cur	*refc_cur;
 	int			error;
 
-	for (i = 0; i < level; i++)  {
-		lptr = &btree_curs->level[i];
-
-		agbno = get_next_blockaddr(agno, i, btree_curs);
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-
-		if (i == btree_curs->num_levels - 1)
-			btree_curs->root = agbno;
-
-		lptr->agbno = agbno;
-		lptr->prev_agbno = NULLAGBLOCK;
-		lptr->prev_buf_p = NULL;
-		/*
-		 * initialize block header
-		 */
-
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_REFC,
-					i, 0, agno);
-	}
+	btr->bload.get_data = get_refcount_data;
+	btr->bload.alloc_block = rebuild_alloc_block;
 
-	/*
-	 * run along leaf, setting up records.  as we have to switch
-	 * blocks, call the prop_refc_cursor routine to set up the new
-	 * pointers for the parent.  that can recurse up to the root
-	 * if required.  set the sibling pointers for leaf level here.
-	 */
-	error = init_refcount_cursor(agno, &refc_cur);
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
 	if (error)
 		do_error(
-_("Insufficient memory to construct refcount cursor."));
-	refc_rec = pop_slab_cursor(refc_cur);
-	lptr = &btree_curs->level[0];
+_("Insufficient memory to construct refcount rebuild transaction.\n"));
 
-	for (i = 0; i < lptr->num_blocks; i++)  {
-		numrecs = lptr->num_recs_pb + (lptr->modulo > 0);
-		ASSERT(refc_rec != NULL || numrecs == 0);
+	error = init_refcount_cursor(agno, &btr->slab_cursor);
+	if (error)
+		do_error(
+_("Insufficient memory to construct refcount cursor.\n"));
 
-		/*
-		 * block initialization, lay in block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_REFC,
-					0, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-		bt_hdr->bb_numrecs = cpu_to_be16(numrecs);
-
-		if (lptr->modulo > 0)
-			lptr->modulo--;
-
-		if (lptr->num_recs_pb > 0)
-			prop_refc_cursor(mp, agno, btree_curs,
-					refc_rec->rc_startblock, 0);
-
-		bt_rec = (struct xfs_refcount_rec *)
-			  ((char *)bt_hdr + XFS_REFCOUNT_BLOCK_LEN);
-		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
-			ASSERT(refc_rec != NULL);
-			bt_rec[j].rc_startblock =
-					cpu_to_be32(refc_rec->rc_startblock);
-			bt_rec[j].rc_blockcount =
-					cpu_to_be32(refc_rec->rc_blockcount);
-			bt_rec[j].rc_refcount = cpu_to_be32(refc_rec->rc_refcount);
-
-			refc_rec = pop_slab_cursor(refc_cur);
-		}
+	/* Add all observed refcount records. */
+	refc_cur = libxfs_refcountbt_stage_cursor(sc->mp, sc->tp,
+			&btr->newbt.afake, agno);
+	error = -libxfs_btree_bload(refc_cur, &btr->bload, btr);
+	if (error)
+		do_error(
+_("Error %d while creating refcount btree for AG %u.\n"), error, agno);
 
-		if (refc_rec != NULL)  {
-			/*
-			 * get next leaf level block
-			 */
-			if (lptr->prev_buf_p != NULL)  {
-#ifdef XR_BLD_RL_TRACE
-				fprintf(stderr, "writing refcntbt agbno %u\n",
-					lptr->prev_agbno);
-#endif
-				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
-				libxfs_writebuf(lptr->prev_buf_p, 0);
-			}
-			lptr->prev_buf_p = lptr->buf_p;
-			lptr->prev_agbno = lptr->agbno;
-			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
-			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
-
-			lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		}
-	}
-	free_slab_cursor(&refc_cur);
+	/* Since we're not writing the AGF yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(refc_cur, 0);
+	free_slab_cursor(&btr->slab_cursor);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing refcount btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
 }
 
 /* Fill the AGFL with any leftover bnobt rebuilder blocks. */
@@ -1484,7 +1278,7 @@ build_agf_agfl(
 	xfs_extlen_t		freeblks,	/* # free blocks in tree */
 	int			lostblocks,	/* # blocks that will be lost */
 	struct bt_rebuild	*btr_rmap,
-	struct bt_status	*refcnt_bt,
+	struct bt_rebuild	*btr_refcount,
 	struct xfs_slab		*lost_fsb)
 {
 	struct extent_tree_node	*ext_ptr;
@@ -1532,11 +1326,14 @@ build_agf_agfl(
 	agf->agf_levels[XFS_BTNUM_RMAP] =
 			cpu_to_be32(btr_rmap->newbt.afake.af_levels);
 	agf->agf_freeblks = cpu_to_be32(freeblks);
-	agf->agf_rmap_blocks = cpu_to_be32(btr_rmap->newbt.afake.af_blocks);
-	agf->agf_refcount_root = cpu_to_be32(refcnt_bt->root);
-	agf->agf_refcount_level = cpu_to_be32(refcnt_bt->num_levels);
-	agf->agf_refcount_blocks = cpu_to_be32(refcnt_bt->num_tot_blocks -
-			refcnt_bt->num_free_blocks);
+	agf->agf_rmap_blocks =
+			cpu_to_be32(btr_rmap->newbt.afake.af_blocks);
+	agf->agf_refcount_root =
+			cpu_to_be32(btr_refcount->newbt.afake.af_root);
+	agf->agf_refcount_level =
+			cpu_to_be32(btr_refcount->newbt.afake.af_levels);
+	agf->agf_refcount_blocks =
+			cpu_to_be32(btr_refcount->newbt.afake.af_blocks);
 
 	/*
 	 * Count and record the number of btree blocks consumed if required.
@@ -1690,7 +1487,7 @@ phase5_func(
 	struct bt_rebuild	btr_ino;
 	struct bt_rebuild	btr_fino;
 	struct bt_rebuild	btr_rmap;
-	bt_status_t		refcnt_btree_curs;
+	struct bt_rebuild	btr_refcount;
 	int			extra_blocks = 0;
 	uint			num_freeblocks;
 	xfs_extlen_t		freeblks1;
@@ -1738,7 +1535,7 @@ phase5_func(
 	 * Set up the btree cursors for the on-disk refcount btrees,
 	 * which includes pre-allocating all required blocks.
 	 */
-	init_refc_cursor(mp, agno, &refcnt_btree_curs);
+	init_refc_cursor(&sc, agno, num_freeblocks, &btr_refcount);
 
 	num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
 	/*
@@ -1809,16 +1606,14 @@ phase5_func(
 		sb_fdblocks_ag[agno] += btr_rmap.newbt.afake.af_blocks - 1;
 	}
 
-	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
-		build_refcount_tree(mp, agno, &refcnt_btree_curs);
-		write_cursor(&refcnt_btree_curs);
-	}
+	if (xfs_sb_version_hasreflink(&mp->m_sb))
+		build_refcount_tree(&sc, agno, &btr_refcount);
 
 	/*
 	 * set up agf and agfl
 	 */
 	build_agf_agfl(mp, agno, &btr_bno, &btr_cnt, freeblks1, extra_blocks,
-			&btr_rmap, &refcnt_btree_curs, lost_fsb);
+			&btr_rmap, &btr_refcount, lost_fsb);
 
 	/*
 	 * build inode allocation trees.
@@ -1841,7 +1636,7 @@ phase5_func(
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 		finish_rebuild(mp, &btr_rmap, lost_fsb);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
-		finish_cursor(&refcnt_btree_curs);
+		finish_rebuild(mp, &btr_refcount, lost_fsb);
 
 	/*
 	 * release the incore per-AG bno/bcnt trees so


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 8/9] xfs_repair: remove old btree rebuild support code
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (6 preceding siblings ...)
  2020-01-01  1:22 ` [PATCH 7/9] xfs_repair: rebuild refcount " Darrick J. Wong
@ 2020-01-01  1:22 ` Darrick J. Wong
  2020-01-01  1:22 ` [PATCH 9/9] xfs_repair: track blocks lost during btree construction via extents Darrick J. Wong
  8 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:22 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

This code isn't needed anymore, so get rid of it.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |  240 -------------------------------------------------------
 1 file changed, 240 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index ee4a4563..94fc17d8 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -20,52 +20,6 @@
 #include "rmap.h"
 #include "bload.h"
 
-/*
- * we maintain the current slice (path from root to leaf)
- * of the btree incore.  when we need a new block, we ask
- * the block allocator for the address of a block on that
- * level, map the block in, and set up the appropriate
- * pointers (child, silbing, etc.) and keys that should
- * point to the new block.
- */
-typedef struct bt_stat_level  {
-	/*
-	 * set in setup_cursor routine and maintained in the tree-building
-	 * routines
-	 */
-	xfs_buf_t		*buf_p;		/* 2 buffer pointers to ... */
-	xfs_buf_t		*prev_buf_p;
-	xfs_agblock_t		agbno;		/* current block being filled */
-	xfs_agblock_t		prev_agbno;	/* previous block */
-	/*
-	 * set in calculate/init cursor routines for each btree level
-	 */
-	int			num_recs_tot;	/* # tree recs in level */
-	int			num_blocks;	/* # tree blocks in level */
-	int			num_recs_pb;	/* num_recs_tot / num_blocks */
-	int			modulo;		/* num_recs_tot % num_blocks */
-} bt_stat_level_t;
-
-typedef struct bt_status  {
-	int			init;		/* cursor set up once? */
-	int			num_levels;	/* # of levels in btree */
-	xfs_extlen_t		num_tot_blocks;	/* # blocks alloc'ed for tree */
-	xfs_extlen_t		num_free_blocks;/* # blocks currently unused */
-
-	xfs_agblock_t		root;		/* root block */
-	/*
-	 * list of blocks to be used to set up this tree
-	 * and pointer to the first unused block on the list
-	 */
-	xfs_agblock_t		*btree_blocks;		/* block list */
-	xfs_agblock_t		*free_btree_blocks;	/* first unused block */
-	/*
-	 * per-level status info
-	 */
-	bt_stat_level_t		level[XFS_BTREE_MAXLEVELS];
-	uint64_t		owner;		/* owner */
-} bt_status_t;
-
 struct bt_rebuild {
 	struct xrep_newbt	newbt;
 	struct xfs_btree_bload	bload;
@@ -188,148 +142,6 @@ mk_incore_fstree(
 	return(num_extents);
 }
 
-static xfs_agblock_t
-get_next_blockaddr(xfs_agnumber_t agno, int level, bt_status_t *curs)
-{
-	ASSERT(curs->free_btree_blocks < curs->btree_blocks +
-						curs->num_tot_blocks);
-	ASSERT(curs->num_free_blocks > 0);
-
-	curs->num_free_blocks--;
-	return(*curs->free_btree_blocks++);
-}
-
-/*
- * set up the dynamically allocated block allocation data in the btree
- * cursor that depends on the info in the static portion of the cursor.
- * allocates space from the incore bno/bcnt extent trees and sets up
- * the first path up the left side of the tree.  Also sets up the
- * cursor pointer to the btree root.   called by init_freespace_cursor()
- * and init_ino_cursor()
- */
-static void
-setup_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *curs)
-{
-	int			j;
-	unsigned int		u;
-	xfs_extlen_t		big_extent_len;
-	xfs_agblock_t		big_extent_start;
-	extent_tree_node_t	*ext_ptr;
-	extent_tree_node_t	*bno_ext_ptr;
-	xfs_extlen_t		blocks_allocated;
-	xfs_agblock_t		*agb_ptr;
-	int			error;
-
-	/*
-	 * get the number of blocks we need to allocate, then
-	 * set up block number array, set the free block pointer
-	 * to the first block in the array, and null the array
-	 */
-	big_extent_len = curs->num_tot_blocks;
-	blocks_allocated = 0;
-
-	ASSERT(big_extent_len > 0);
-
-	if ((curs->btree_blocks = malloc(sizeof(xfs_agblock_t)
-					* big_extent_len)) == NULL)
-		do_error(_("could not set up btree block array\n"));
-
-	agb_ptr = curs->free_btree_blocks = curs->btree_blocks;
-
-	for (j = 0; j < curs->num_free_blocks; j++, agb_ptr++)
-		*agb_ptr = NULLAGBLOCK;
-
-	/*
-	 * grab the smallest extent and use it up, then get the
-	 * next smallest.  This mimics the init_*_cursor code.
-	 */
-	ext_ptr =  findfirst_bcnt_extent(agno);
-
-	agb_ptr = curs->btree_blocks;
-
-	/*
-	 * set up the free block array
-	 */
-	while (blocks_allocated < big_extent_len)  {
-		if (!ext_ptr)
-			do_error(
-_("error - not enough free space in filesystem\n"));
-		/*
-		 * use up the extent we've got
-		 */
-		for (u = 0; u < ext_ptr->ex_blockcount &&
-				blocks_allocated < big_extent_len; u++)  {
-			ASSERT(agb_ptr < curs->btree_blocks
-					+ curs->num_tot_blocks);
-			*agb_ptr++ = ext_ptr->ex_startblock + u;
-			blocks_allocated++;
-		}
-
-		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, u,
-				curs->owner);
-		if (error)
-			do_error(_("could not set up btree rmaps: %s\n"),
-				strerror(-error));
-
-		/*
-		 * if we only used part of this last extent, then we
-		 * need only to reset the extent in the extent
-		 * trees and we're done
-		 */
-		if (u < ext_ptr->ex_blockcount)  {
-			big_extent_start = ext_ptr->ex_startblock + u;
-			big_extent_len = ext_ptr->ex_blockcount - u;
-
-			ASSERT(big_extent_len > 0);
-
-			bno_ext_ptr = find_bno_extent(agno,
-						ext_ptr->ex_startblock);
-			ASSERT(bno_ext_ptr != NULL);
-			get_bno_extent(agno, bno_ext_ptr);
-			release_extent_tree_node(bno_ext_ptr);
-
-			ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
-					ext_ptr->ex_blockcount);
-			release_extent_tree_node(ext_ptr);
-#ifdef XR_BLD_FREE_TRACE
-			fprintf(stderr, "releasing extent: %u [%u %u]\n",
-				agno, ext_ptr->ex_startblock,
-				ext_ptr->ex_blockcount);
-			fprintf(stderr, "blocks_allocated = %d\n",
-				blocks_allocated);
-#endif
-
-			add_bno_extent(agno, big_extent_start, big_extent_len);
-			add_bcnt_extent(agno, big_extent_start, big_extent_len);
-
-			return;
-		}
-		/*
-		 * delete the used-up extent from both extent trees and
-		 * find next biggest extent
-		 */
-#ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "releasing extent: %u [%u %u]\n",
-			agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
-#endif
-		bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
-		ASSERT(bno_ext_ptr != NULL);
-		get_bno_extent(agno, bno_ext_ptr);
-		release_extent_tree_node(bno_ext_ptr);
-
-		ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
-				ext_ptr->ex_blockcount);
-		ASSERT(ext_ptr != NULL);
-		release_extent_tree_node(ext_ptr);
-
-		ext_ptr = findfirst_bcnt_extent(agno);
-	}
-#ifdef XR_BLD_FREE_TRACE
-	fprintf(stderr, "blocks_allocated = %d\n",
-		blocks_allocated);
-#endif
-}
-
 /*
  * Estimate proper slack values for a btree that's being reloaded.
  *
@@ -481,34 +293,6 @@ rebuild_alloc_block(
 	return xrep_newbt_alloc_block(cur, &btr->newbt, ptr);
 }
 
-static void
-write_cursor(bt_status_t *curs)
-{
-	int i;
-
-	for (i = 0; i < curs->num_levels; i++)  {
-#if defined(XR_BLD_FREE_TRACE) || defined(XR_BLD_INO_TRACE)
-		fprintf(stderr, "writing bt block %u\n", curs->level[i].agbno);
-#endif
-		if (curs->level[i].prev_buf_p != NULL)  {
-			ASSERT(curs->level[i].prev_agbno != NULLAGBLOCK);
-#if defined(XR_BLD_FREE_TRACE) || defined(XR_BLD_INO_TRACE)
-			fprintf(stderr, "writing bt prev block %u\n",
-						curs->level[i].prev_agbno);
-#endif
-			libxfs_writebuf(curs->level[i].prev_buf_p, 0);
-		}
-		libxfs_writebuf(curs->level[i].buf_p, 0);
-	}
-}
-
-static void
-finish_cursor(bt_status_t *curs)
-{
-	ASSERT(curs->num_free_blocks == 0);
-	free(curs->btree_blocks);
-}
-
 /*
  * Scoop up leftovers from a rebuild cursor for later freeing, then free the
  * rebuild context.
@@ -537,30 +321,6 @@ _("Insufficient memory saving lost blocks.\n"));
 	xrep_newbt_destroy(&btr->newbt, 0);
 }
 
-/* Map btnum to buffer ops for the types that need it. */
-static const struct xfs_buf_ops *
-btnum_to_ops(
-	xfs_btnum_t	btnum)
-{
-	switch (btnum) {
-	case XFS_BTNUM_BNO:
-		return &xfs_bnobt_buf_ops;
-	case XFS_BTNUM_CNT:
-		return &xfs_cntbt_buf_ops;
-	case XFS_BTNUM_INO:
-		return &xfs_inobt_buf_ops;
-	case XFS_BTNUM_FINO:
-		return &xfs_finobt_buf_ops;
-	case XFS_BTNUM_RMAP:
-		return &xfs_rmapbt_buf_ops;
-	case XFS_BTNUM_REFC:
-		return &xfs_refcountbt_buf_ops;
-	default:
-		ASSERT(0);
-		return NULL;
-	}
-}
-
 /*
  * Free Space Btrees
  *


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 9/9] xfs_repair: track blocks lost during btree construction via extents
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (7 preceding siblings ...)
  2020-01-01  1:22 ` [PATCH 8/9] xfs_repair: remove old btree rebuild support code Darrick J. Wong
@ 2020-01-01  1:22 ` Darrick J. Wong
  8 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:22 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use extent records (not just raw fsbs) to track blocks that were lost
during btree construction.  This makes it somewhat more efficient.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |   60 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 26 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index 94fc17d8..1519a372 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -20,6 +20,11 @@
 #include "rmap.h"
 #include "bload.h"
 
+struct lost_fsb {
+	xfs_fsblock_t		fsbno;
+	xfs_extlen_t		len;
+};
+
 struct bt_rebuild {
 	struct xrep_newbt	newbt;
 	struct xfs_btree_bload	bload;
@@ -301,21 +306,24 @@ static void
 finish_rebuild(
 	struct xfs_mount	*mp,
 	struct bt_rebuild	*btr,
-	struct xfs_slab		*lost_fsb)
+	struct xfs_slab		*lost_fsbs)
 {
 	struct xrep_newbt_resv	*resv, *n;
 
 	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
-		while (resv->used < resv->len) {
-			xfs_fsblock_t	fsb = resv->fsbno + resv->used;
-			int		error;
+		struct lost_fsb	lost;
+		int		error;
 
-			error = slab_add(lost_fsb, &fsb);
-			if (error)
-				do_error(
+		if (resv->used == resv->len)
+			continue;
+
+		lost.fsbno = resv->fsbno + resv->used;
+		lost.len = resv->len - resv->used;
+		error = slab_add(lost_fsbs, &lost);
+		if (error)
+			do_error(
 _("Insufficient memory saving lost blocks.\n"));
-			resv->used++;
-		}
+		resv->used = resv->len;
 	}
 
 	xrep_newbt_destroy(&btr->newbt, 0);
@@ -1039,7 +1047,7 @@ build_agf_agfl(
 	int			lostblocks,	/* # blocks that will be lost */
 	struct bt_rebuild	*btr_rmap,
 	struct bt_rebuild	*btr_refcount,
-	struct xfs_slab		*lost_fsb)
+	struct xfs_slab		*lost_fsbs)
 {
 	struct extent_tree_node	*ext_ptr;
 	struct xfs_buf		*agf_buf, *agfl_buf;
@@ -1238,7 +1246,7 @@ static void
 phase5_func(
 	struct xfs_mount	*mp,
 	xfs_agnumber_t		agno,
-	struct xfs_slab		*lost_fsb)
+	struct xfs_slab		*lost_fsbs)
 {
 	struct repair_ctx	sc = { .mp = mp, };
 	struct agi_stat		agi_stat = {0,};
@@ -1373,7 +1381,7 @@ phase5_func(
 	 * set up agf and agfl
 	 */
 	build_agf_agfl(mp, agno, &btr_bno, &btr_cnt, freeblks1, extra_blocks,
-			&btr_rmap, &btr_refcount, lost_fsb);
+			&btr_rmap, &btr_refcount, lost_fsbs);
 
 	/*
 	 * build inode allocation trees.
@@ -1388,15 +1396,15 @@ phase5_func(
 	/*
 	 * tear down cursors
 	 */
-	finish_rebuild(mp, &btr_bno, lost_fsb);
-	finish_rebuild(mp, &btr_cnt, lost_fsb);
-	finish_rebuild(mp, &btr_ino, lost_fsb);
+	finish_rebuild(mp, &btr_bno, lost_fsbs);
+	finish_rebuild(mp, &btr_cnt, lost_fsbs);
+	finish_rebuild(mp, &btr_ino, lost_fsbs);
 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
-		finish_rebuild(mp, &btr_fino, lost_fsb);
+		finish_rebuild(mp, &btr_fino, lost_fsbs);
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-		finish_rebuild(mp, &btr_rmap, lost_fsb);
+		finish_rebuild(mp, &btr_rmap, lost_fsbs);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
-		finish_rebuild(mp, &btr_refcount, lost_fsb);
+		finish_rebuild(mp, &btr_refcount, lost_fsbs);
 
 	/*
 	 * release the incore per-AG bno/bcnt trees so
@@ -1416,19 +1424,19 @@ inject_lost_blocks(
 {
 	struct xfs_trans	*tp = NULL;
 	struct xfs_slab_cursor	*cur = NULL;
-	xfs_fsblock_t		*fsb;
+	struct lost_fsb		*lost;
 	int			error;
 
 	error = init_slab_cursor(lost_fsbs, NULL, &cur);
 	if (error)
 		return error;
 
-	while ((fsb = pop_slab_cursor(cur)) != NULL) {
+	while ((lost = pop_slab_cursor(cur)) != NULL) {
 		error = -libxfs_trans_alloc_rollable(mp, 16, &tp);
 		if (error)
 			goto out_cancel;
 
-		error = -libxfs_free_extent(tp, *fsb, 1,
+		error = -libxfs_free_extent(tp, lost->fsbno, lost->len,
 				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
 		if (error)
 			goto out_cancel;
@@ -1449,7 +1457,7 @@ inject_lost_blocks(
 void
 phase5(xfs_mount_t *mp)
 {
-	struct xfs_slab		*lost_fsb;
+	struct xfs_slab		*lost_fsbs;
 	xfs_agnumber_t		agno;
 	int			error;
 
@@ -1492,12 +1500,12 @@ phase5(xfs_mount_t *mp)
 	if (sb_fdblocks_ag == NULL)
 		do_error(_("cannot alloc sb_fdblocks_ag buffers\n"));
 
-	error = init_slab(&lost_fsb, sizeof(xfs_fsblock_t));
+	error = init_slab(&lost_fsbs, sizeof(struct lost_fsb));
 	if (error)
 		do_error(_("cannot alloc lost block slab\n"));
 
 	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)
-		phase5_func(mp, agno, lost_fsb);
+		phase5_func(mp, agno, lost_fsbs);
 
 	print_final_rpt();
 
@@ -1540,10 +1548,10 @@ _("unable to add AG %u reverse-mapping data to btree.\n"), agno);
 	 * Put blocks that were unnecessarily reserved for btree
 	 * reconstruction back into the filesystem free space data.
 	 */
-	error = inject_lost_blocks(mp, lost_fsb);
+	error = inject_lost_blocks(mp, lost_fsbs);
 	if (error)
 		do_error(_("Unable to reinsert lost blocks into filesystem.\n"));
-	free_slab(&lost_fsb);
+	free_slab(&lost_fsbs);
 
 	bad_ino_btree = 0;
 


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-05-28 15:09       ` Brian Foster
@ 2020-05-29 21:08         ` Darrick J. Wong
  0 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-05-29 21:08 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Thu, May 28, 2020 at 11:09:21AM -0400, Brian Foster wrote:
> On Wed, May 27, 2020 at 03:07:33PM -0700, Darrick J. Wong wrote:
> > On Wed, May 27, 2020 at 08:18:04AM -0400, Brian Foster wrote:
> > > On Tue, May 19, 2020 at 06:51:02PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > 
> > > > Create some new support structures and functions to assist phase5 in
> > > > using the btree bulk loader to reconstruct metadata btrees.  This is the
> > > > first step in removing the open-coded rebuilding code.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > >  repair/phase5.c |  239 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
> > > >  1 file changed, 218 insertions(+), 21 deletions(-)
> > > > 
> > > > 
> > > > diff --git a/repair/phase5.c b/repair/phase5.c
> > > > index 84c05a13..8f5e5f59 100644
> > > > --- a/repair/phase5.c
> > > > +++ b/repair/phase5.c
> > > > @@ -18,6 +18,7 @@
> > > >  #include "progress.h"
> > > >  #include "slab.h"
> > > >  #include "rmap.h"
> > > > +#include "bload.h"
> > > >  
> > > >  /*
> > > >   * we maintain the current slice (path from root to leaf)
> > > ...
> > > > @@ -306,6 +324,156 @@ _("error - not enough free space in filesystem\n"));
> > > >  #endif
> > > >  }
> > > >  
> > > ...
> > > > +static void
> > > > +consume_freespace(
> > > > +	xfs_agnumber_t		agno,
> > > > +	struct extent_tree_node	*ext_ptr,
> > > > +	uint32_t		len)
> > > > +{
> > > > +	struct extent_tree_node	*bno_ext_ptr;
> > > > +	xfs_agblock_t		new_start = ext_ptr->ex_startblock + len;
> > > > +	xfs_extlen_t		new_len = ext_ptr->ex_blockcount - len;
> > > > +
> > > > +	/* Delete the used-up extent from both extent trees. */
> > > > +#ifdef XR_BLD_FREE_TRACE
> > > > +	fprintf(stderr, "releasing extent: %u [%u %u]\n", agno,
> > > > +			ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
> > > > +#endif
> > > > +	bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
> > > > +	ASSERT(bno_ext_ptr != NULL);
> > > > +	get_bno_extent(agno, bno_ext_ptr);
> > > > +	release_extent_tree_node(bno_ext_ptr);
> > > > +
> > > > +	ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
> > > > +			ext_ptr->ex_blockcount);
> > > > +	release_extent_tree_node(ext_ptr);
> > > > +
> > > 
> > > Not having looked too deeply at the in-core extent tracking structures,
> > > is there any particular reason we unconditionally remove and reinsert
> > > new records each time around? Is it because we're basically changing the
> > > extent index in the tree? If so, comment please (an update to the
> > > comment below is probably fine). :)
> > 
> > Yes.  We're changing the free space tree records, and the incore bno and
> > cnt trees maintain the records in sorted order.  Therefore, if we want
> > to change a record we have to delete the record from the tree and
> > reinsert it.
> > 
> > > > +	/*
> > > > +	 * If we only used part of this last extent, then we must reinsert the
> > > > +	 * extent in the extent trees.
> > 
> > /*
> >  * If we only used part of this last extent, then we must reinsert the
> >  * extent to maintain proper sorting order.
> >  */
> > 
> > How about that?
> > 
> 
> Works for me, thanks.
> 
> > > > +	 */
> > > > +	if (new_len > 0) {
> > > > +		add_bno_extent(agno, new_start, new_len);
> > > > +		add_bcnt_extent(agno, new_start, new_len);
> > > > +	}
> > > > +}
> > > > +
> > > > +/* Reserve blocks for the new btree. */
> > > > +static void
> > > > +setup_rebuild(
> > > > +	struct xfs_mount	*mp,
> > > > +	xfs_agnumber_t		agno,
> > > > +	struct bt_rebuild	*btr,
> > > > +	uint32_t		nr_blocks)
> > > > +{
> > > > +	struct extent_tree_node	*ext_ptr;
> > > > +	uint32_t		blocks_allocated = 0;
> > > > +	uint32_t		len;
> > > > +	int			error;
> > > > +
> > > > +	while (blocks_allocated < nr_blocks)  {
> > > > +		/*
> > > > +		 * Grab the smallest extent and use it up, then get the
> > > > +		 * next smallest.  This mimics the init_*_cursor code.
> > > > +		 */
> > > > +		ext_ptr =  findfirst_bcnt_extent(agno);
> > > 
> > > Extra whitespace	  ^
> > > 
> > > > +		if (!ext_ptr)
> > > > +			do_error(
> > > > +_("error - not enough free space in filesystem\n"));
> > > > +
> > > > +		/* Use up the extent we've got. */
> > > > +		len = min(ext_ptr->ex_blockcount, nr_blocks - blocks_allocated);
> > > > +		error = xrep_newbt_add_blocks(&btr->newbt,
> > > > +				XFS_AGB_TO_FSB(mp, agno,
> > > > +					       ext_ptr->ex_startblock),
> > > > +				len);
> > > 
> > > Alignment.
> > 
> > Will fix both of these.
> > 
> > > > +		if (error)
> > > > +			do_error(_("could not set up btree reservation: %s\n"),
> > > > +				strerror(-error));
> > > > +
> > > > +		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
> > > > +				btr->newbt.oinfo.oi_owner);
> > > > +		if (error)
> > > > +			do_error(_("could not set up btree rmaps: %s\n"),
> > > > +				strerror(-error));
> > > > +
> > > > +		consume_freespace(agno, ext_ptr, len);
> > > > +		blocks_allocated += len;
> > > > +	}
> > > > +#ifdef XR_BLD_FREE_TRACE
> > > > +	fprintf(stderr, "blocks_allocated = %d\n",
> > > > +		blocks_allocated);
> > > > +#endif
> > > > +}
> > > > +
> > > > +/* Feed one of the new btree blocks to the bulk loader. */
> > > > +static int
> > > > +rebuild_claim_block(
> > > > +	struct xfs_btree_cur	*cur,
> > > > +	union xfs_btree_ptr	*ptr,
> > > > +	void			*priv)
> > > > +{
> > > > +	struct bt_rebuild	*btr = priv;
> > > > +
> > > > +	return xrep_newbt_claim_block(cur, &btr->newbt, ptr);
> > > > +}
> > > > +
> > > 
> > > Seems like an unnecessary helper, unless this grows more code in later
> > > patches..?
> > 
> > It doesn't grow any more code, but keep in mind that get_record,
> > claim_block, and iroot_size are all callbacks of xfs_btree_bload().  The
> > priv parameter passed to that function are passed unchanged to the three
> > callbacks.  The bulk load code doesn't know anything about where the
> > blocks or the records come from, so this is how both repairs will pass
> > that information to the callbacks.
> > 
> 
> Ok.
> 
> > > >  static void
> > > >  write_cursor(bt_status_t *curs)
> > > >  {
> > > ...
> > > > @@ -2287,28 +2483,29 @@ keep_fsinos(xfs_mount_t *mp)
> > > >  
> > > >  static void
> > > >  phase5_func(
> > > > -	xfs_mount_t	*mp,
> > > > -	xfs_agnumber_t	agno,
> > > > -	struct xfs_slab	*lost_fsb)
> > > > +	struct xfs_mount	*mp,
> > > > +	xfs_agnumber_t		agno,
> > > > +	struct xfs_slab		*lost_fsb)
> > > >  {
> > > > -	uint64_t	num_inos;
> > > > -	uint64_t	num_free_inos;
> > > > -	uint64_t	finobt_num_inos;
> > > > -	uint64_t	finobt_num_free_inos;
> > > > -	bt_status_t	bno_btree_curs;
> > > > -	bt_status_t	bcnt_btree_curs;
> > > > -	bt_status_t	ino_btree_curs;
> > > > -	bt_status_t	fino_btree_curs;
> > > > -	bt_status_t	rmap_btree_curs;
> > > > -	bt_status_t	refcnt_btree_curs;
> > > > -	int		extra_blocks = 0;
> > > > -	uint		num_freeblocks;
> > > > -	xfs_extlen_t	freeblks1;
> > > > +	struct repair_ctx	sc = { .mp = mp, };
> > > 
> > > I don't see any reason to add sc here when it's still unused. It's not
> > > as if a single variable is saving complexity somewhere else. I guess
> > > I'll defer to Eric on the approach wrt to the other unused warnings.
> > 
> > <shrug> I'll ask.  It seems dumb to have a prep patch that adds a bunch
> > of symbols that won't get used until the next patch, but OTOH combining
> > the two will make for a ~40K patch.
> > 
> 
> I've no strong preference either way in general (the single variable
> thing aside) as long as each patch compiles and functions correctly and
> warnings are addressed by the end of the series. I do think that if we
> keep separate patches, it should probably be documented in the commit
> log that unused infrastructure is introduced (i.e. warnings expected)
> and users are introduced in a following patch. It's usually easier to
> squash patches than separate, so the maintainer can always squash them
> post review if he wanted to eliminate the warnings from the commit
> history.

Ok, will do.

--D

> Brian
> 
> > > Also, what's the purpose of the rmap change below? I'm wondering if that
> > > (along with all of the indentation cleanup) should be its own patch with
> > > appropriate explanation.
> > 
> > Errk, that one definitely should be separate.
> > 
> > > Brian
> > > 
> > > > +	struct agi_stat		agi_stat = {0,};
> > > > +	uint64_t		num_inos;
> > > > +	uint64_t		num_free_inos;
> > > > +	uint64_t		finobt_num_inos;
> > > > +	uint64_t		finobt_num_free_inos;
> > > > +	bt_status_t		bno_btree_curs;
> > > > +	bt_status_t		bcnt_btree_curs;
> > > > +	bt_status_t		ino_btree_curs;
> > > > +	bt_status_t		fino_btree_curs;
> > > > +	bt_status_t		rmap_btree_curs;
> > > > +	bt_status_t		refcnt_btree_curs;
> > > > +	int			extra_blocks = 0;
> > > > +	uint			num_freeblocks;
> > > > +	xfs_extlen_t		freeblks1;
> > > >  #ifdef DEBUG
> > > > -	xfs_extlen_t	freeblks2;
> > > > +	xfs_extlen_t		freeblks2;
> > > >  #endif
> > > > -	xfs_agblock_t	num_extents;
> > > > -	struct agi_stat	agi_stat = {0,};
> > > > +	xfs_agblock_t		num_extents;
> > > >  
> > > >  	if (verbose)
> > > >  		do_log(_("        - agno = %d\n"), agno);
> > > > @@ -2516,8 +2713,8 @@ inject_lost_blocks(
> > > >  		if (error)
> > > >  			goto out_cancel;
> > > >  
> > > > -		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
> > > > -					    XFS_AG_RESV_NONE);
> > > > +		error = -libxfs_free_extent(tp, *fsb, 1,
> > > > +				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
> > > >  		if (error)
> > > >  			goto out_cancel;
> > > >  
> > > > 
> > > 
> > 
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-05-27 22:07     ` Darrick J. Wong
@ 2020-05-28 15:09       ` Brian Foster
  2020-05-29 21:08         ` Darrick J. Wong
  0 siblings, 1 reply; 20+ messages in thread
From: Brian Foster @ 2020-05-28 15:09 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Wed, May 27, 2020 at 03:07:33PM -0700, Darrick J. Wong wrote:
> On Wed, May 27, 2020 at 08:18:04AM -0400, Brian Foster wrote:
> > On Tue, May 19, 2020 at 06:51:02PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Create some new support structures and functions to assist phase5 in
> > > using the btree bulk loader to reconstruct metadata btrees.  This is the
> > > first step in removing the open-coded rebuilding code.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  repair/phase5.c |  239 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
> > >  1 file changed, 218 insertions(+), 21 deletions(-)
> > > 
> > > 
> > > diff --git a/repair/phase5.c b/repair/phase5.c
> > > index 84c05a13..8f5e5f59 100644
> > > --- a/repair/phase5.c
> > > +++ b/repair/phase5.c
> > > @@ -18,6 +18,7 @@
> > >  #include "progress.h"
> > >  #include "slab.h"
> > >  #include "rmap.h"
> > > +#include "bload.h"
> > >  
> > >  /*
> > >   * we maintain the current slice (path from root to leaf)
> > ...
> > > @@ -306,6 +324,156 @@ _("error - not enough free space in filesystem\n"));
> > >  #endif
> > >  }
> > >  
> > ...
> > > +static void
> > > +consume_freespace(
> > > +	xfs_agnumber_t		agno,
> > > +	struct extent_tree_node	*ext_ptr,
> > > +	uint32_t		len)
> > > +{
> > > +	struct extent_tree_node	*bno_ext_ptr;
> > > +	xfs_agblock_t		new_start = ext_ptr->ex_startblock + len;
> > > +	xfs_extlen_t		new_len = ext_ptr->ex_blockcount - len;
> > > +
> > > +	/* Delete the used-up extent from both extent trees. */
> > > +#ifdef XR_BLD_FREE_TRACE
> > > +	fprintf(stderr, "releasing extent: %u [%u %u]\n", agno,
> > > +			ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
> > > +#endif
> > > +	bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
> > > +	ASSERT(bno_ext_ptr != NULL);
> > > +	get_bno_extent(agno, bno_ext_ptr);
> > > +	release_extent_tree_node(bno_ext_ptr);
> > > +
> > > +	ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
> > > +			ext_ptr->ex_blockcount);
> > > +	release_extent_tree_node(ext_ptr);
> > > +
> > 
> > Not having looked too deeply at the in-core extent tracking structures,
> > is there any particular reason we unconditionally remove and reinsert
> > new records each time around? Is it because we're basically changing the
> > extent index in the tree? If so, comment please (an update to the
> > comment below is probably fine). :)
> 
> Yes.  We're changing the free space tree records, and the incore bno and
> cnt trees maintain the records in sorted order.  Therefore, if we want
> to change a record we have to delete the record from the tree and
> reinsert it.
> 
> > > +	/*
> > > +	 * If we only used part of this last extent, then we must reinsert the
> > > +	 * extent in the extent trees.
> 
> /*
>  * If we only used part of this last extent, then we must reinsert the
>  * extent to maintain proper sorting order.
>  */
> 
> How about that?
> 

Works for me, thanks.

> > > +	 */
> > > +	if (new_len > 0) {
> > > +		add_bno_extent(agno, new_start, new_len);
> > > +		add_bcnt_extent(agno, new_start, new_len);
> > > +	}
> > > +}
> > > +
> > > +/* Reserve blocks for the new btree. */
> > > +static void
> > > +setup_rebuild(
> > > +	struct xfs_mount	*mp,
> > > +	xfs_agnumber_t		agno,
> > > +	struct bt_rebuild	*btr,
> > > +	uint32_t		nr_blocks)
> > > +{
> > > +	struct extent_tree_node	*ext_ptr;
> > > +	uint32_t		blocks_allocated = 0;
> > > +	uint32_t		len;
> > > +	int			error;
> > > +
> > > +	while (blocks_allocated < nr_blocks)  {
> > > +		/*
> > > +		 * Grab the smallest extent and use it up, then get the
> > > +		 * next smallest.  This mimics the init_*_cursor code.
> > > +		 */
> > > +		ext_ptr =  findfirst_bcnt_extent(agno);
> > 
> > Extra whitespace	  ^
> > 
> > > +		if (!ext_ptr)
> > > +			do_error(
> > > +_("error - not enough free space in filesystem\n"));
> > > +
> > > +		/* Use up the extent we've got. */
> > > +		len = min(ext_ptr->ex_blockcount, nr_blocks - blocks_allocated);
> > > +		error = xrep_newbt_add_blocks(&btr->newbt,
> > > +				XFS_AGB_TO_FSB(mp, agno,
> > > +					       ext_ptr->ex_startblock),
> > > +				len);
> > 
> > Alignment.
> 
> Will fix both of these.
> 
> > > +		if (error)
> > > +			do_error(_("could not set up btree reservation: %s\n"),
> > > +				strerror(-error));
> > > +
> > > +		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
> > > +				btr->newbt.oinfo.oi_owner);
> > > +		if (error)
> > > +			do_error(_("could not set up btree rmaps: %s\n"),
> > > +				strerror(-error));
> > > +
> > > +		consume_freespace(agno, ext_ptr, len);
> > > +		blocks_allocated += len;
> > > +	}
> > > +#ifdef XR_BLD_FREE_TRACE
> > > +	fprintf(stderr, "blocks_allocated = %d\n",
> > > +		blocks_allocated);
> > > +#endif
> > > +}
> > > +
> > > +/* Feed one of the new btree blocks to the bulk loader. */
> > > +static int
> > > +rebuild_claim_block(
> > > +	struct xfs_btree_cur	*cur,
> > > +	union xfs_btree_ptr	*ptr,
> > > +	void			*priv)
> > > +{
> > > +	struct bt_rebuild	*btr = priv;
> > > +
> > > +	return xrep_newbt_claim_block(cur, &btr->newbt, ptr);
> > > +}
> > > +
> > 
> > Seems like an unnecessary helper, unless this grows more code in later
> > patches..?
> 
> It doesn't grow any more code, but keep in mind that get_record,
> claim_block, and iroot_size are all callbacks of xfs_btree_bload().  The
> priv parameter passed to that function are passed unchanged to the three
> callbacks.  The bulk load code doesn't know anything about where the
> blocks or the records come from, so this is how both repairs will pass
> that information to the callbacks.
> 

Ok.

> > >  static void
> > >  write_cursor(bt_status_t *curs)
> > >  {
> > ...
> > > @@ -2287,28 +2483,29 @@ keep_fsinos(xfs_mount_t *mp)
> > >  
> > >  static void
> > >  phase5_func(
> > > -	xfs_mount_t	*mp,
> > > -	xfs_agnumber_t	agno,
> > > -	struct xfs_slab	*lost_fsb)
> > > +	struct xfs_mount	*mp,
> > > +	xfs_agnumber_t		agno,
> > > +	struct xfs_slab		*lost_fsb)
> > >  {
> > > -	uint64_t	num_inos;
> > > -	uint64_t	num_free_inos;
> > > -	uint64_t	finobt_num_inos;
> > > -	uint64_t	finobt_num_free_inos;
> > > -	bt_status_t	bno_btree_curs;
> > > -	bt_status_t	bcnt_btree_curs;
> > > -	bt_status_t	ino_btree_curs;
> > > -	bt_status_t	fino_btree_curs;
> > > -	bt_status_t	rmap_btree_curs;
> > > -	bt_status_t	refcnt_btree_curs;
> > > -	int		extra_blocks = 0;
> > > -	uint		num_freeblocks;
> > > -	xfs_extlen_t	freeblks1;
> > > +	struct repair_ctx	sc = { .mp = mp, };
> > 
> > I don't see any reason to add sc here when it's still unused. It's not
> > as if a single variable is saving complexity somewhere else. I guess
> > I'll defer to Eric on the approach wrt to the other unused warnings.
> 
> <shrug> I'll ask.  It seems dumb to have a prep patch that adds a bunch
> of symbols that won't get used until the next patch, but OTOH combining
> the two will make for a ~40K patch.
> 

I've no strong preference either way in general (the single variable
thing aside) as long as each patch compiles and functions correctly and
warnings are addressed by the end of the series. I do think that if we
keep separate patches, it should probably be documented in the commit
log that unused infrastructure is introduced (i.e. warnings expected)
and users are introduced in a following patch. It's usually easier to
squash patches than separate, so the maintainer can always squash them
post review if he wanted to eliminate the warnings from the commit
history.

Brian

> > Also, what's the purpose of the rmap change below? I'm wondering if that
> > (along with all of the indentation cleanup) should be its own patch with
> > appropriate explanation.
> 
> Errk, that one definitely should be separate.
> 
> > Brian
> > 
> > > +	struct agi_stat		agi_stat = {0,};
> > > +	uint64_t		num_inos;
> > > +	uint64_t		num_free_inos;
> > > +	uint64_t		finobt_num_inos;
> > > +	uint64_t		finobt_num_free_inos;
> > > +	bt_status_t		bno_btree_curs;
> > > +	bt_status_t		bcnt_btree_curs;
> > > +	bt_status_t		ino_btree_curs;
> > > +	bt_status_t		fino_btree_curs;
> > > +	bt_status_t		rmap_btree_curs;
> > > +	bt_status_t		refcnt_btree_curs;
> > > +	int			extra_blocks = 0;
> > > +	uint			num_freeblocks;
> > > +	xfs_extlen_t		freeblks1;
> > >  #ifdef DEBUG
> > > -	xfs_extlen_t	freeblks2;
> > > +	xfs_extlen_t		freeblks2;
> > >  #endif
> > > -	xfs_agblock_t	num_extents;
> > > -	struct agi_stat	agi_stat = {0,};
> > > +	xfs_agblock_t		num_extents;
> > >  
> > >  	if (verbose)
> > >  		do_log(_("        - agno = %d\n"), agno);
> > > @@ -2516,8 +2713,8 @@ inject_lost_blocks(
> > >  		if (error)
> > >  			goto out_cancel;
> > >  
> > > -		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
> > > -					    XFS_AG_RESV_NONE);
> > > +		error = -libxfs_free_extent(tp, *fsb, 1,
> > > +				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
> > >  		if (error)
> > >  			goto out_cancel;
> > >  
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-05-27 12:18   ` Brian Foster
@ 2020-05-27 22:07     ` Darrick J. Wong
  2020-05-28 15:09       ` Brian Foster
  0 siblings, 1 reply; 20+ messages in thread
From: Darrick J. Wong @ 2020-05-27 22:07 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Wed, May 27, 2020 at 08:18:04AM -0400, Brian Foster wrote:
> On Tue, May 19, 2020 at 06:51:02PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Create some new support structures and functions to assist phase5 in
> > using the btree bulk loader to reconstruct metadata btrees.  This is the
> > first step in removing the open-coded rebuilding code.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  repair/phase5.c |  239 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 218 insertions(+), 21 deletions(-)
> > 
> > 
> > diff --git a/repair/phase5.c b/repair/phase5.c
> > index 84c05a13..8f5e5f59 100644
> > --- a/repair/phase5.c
> > +++ b/repair/phase5.c
> > @@ -18,6 +18,7 @@
> >  #include "progress.h"
> >  #include "slab.h"
> >  #include "rmap.h"
> > +#include "bload.h"
> >  
> >  /*
> >   * we maintain the current slice (path from root to leaf)
> ...
> > @@ -306,6 +324,156 @@ _("error - not enough free space in filesystem\n"));
> >  #endif
> >  }
> >  
> ...
> > +static void
> > +consume_freespace(
> > +	xfs_agnumber_t		agno,
> > +	struct extent_tree_node	*ext_ptr,
> > +	uint32_t		len)
> > +{
> > +	struct extent_tree_node	*bno_ext_ptr;
> > +	xfs_agblock_t		new_start = ext_ptr->ex_startblock + len;
> > +	xfs_extlen_t		new_len = ext_ptr->ex_blockcount - len;
> > +
> > +	/* Delete the used-up extent from both extent trees. */
> > +#ifdef XR_BLD_FREE_TRACE
> > +	fprintf(stderr, "releasing extent: %u [%u %u]\n", agno,
> > +			ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
> > +#endif
> > +	bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
> > +	ASSERT(bno_ext_ptr != NULL);
> > +	get_bno_extent(agno, bno_ext_ptr);
> > +	release_extent_tree_node(bno_ext_ptr);
> > +
> > +	ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
> > +			ext_ptr->ex_blockcount);
> > +	release_extent_tree_node(ext_ptr);
> > +
> 
> Not having looked too deeply at the in-core extent tracking structures,
> is there any particular reason we unconditionally remove and reinsert
> new records each time around? Is it because we're basically changing the
> extent index in the tree? If so, comment please (an update to the
> comment below is probably fine). :)

Yes.  We're changing the free space tree records, and the incore bno and
cnt trees maintain the records in sorted order.  Therefore, if we want
to change a record we have to delete the record from the tree and
reinsert it.

> > +	/*
> > +	 * If we only used part of this last extent, then we must reinsert the
> > +	 * extent in the extent trees.

/*
 * If we only used part of this last extent, then we must reinsert the
 * extent to maintain proper sorting order.
 */

How about that?

> > +	 */
> > +	if (new_len > 0) {
> > +		add_bno_extent(agno, new_start, new_len);
> > +		add_bcnt_extent(agno, new_start, new_len);
> > +	}
> > +}
> > +
> > +/* Reserve blocks for the new btree. */
> > +static void
> > +setup_rebuild(
> > +	struct xfs_mount	*mp,
> > +	xfs_agnumber_t		agno,
> > +	struct bt_rebuild	*btr,
> > +	uint32_t		nr_blocks)
> > +{
> > +	struct extent_tree_node	*ext_ptr;
> > +	uint32_t		blocks_allocated = 0;
> > +	uint32_t		len;
> > +	int			error;
> > +
> > +	while (blocks_allocated < nr_blocks)  {
> > +		/*
> > +		 * Grab the smallest extent and use it up, then get the
> > +		 * next smallest.  This mimics the init_*_cursor code.
> > +		 */
> > +		ext_ptr =  findfirst_bcnt_extent(agno);
> 
> Extra whitespace	  ^
> 
> > +		if (!ext_ptr)
> > +			do_error(
> > +_("error - not enough free space in filesystem\n"));
> > +
> > +		/* Use up the extent we've got. */
> > +		len = min(ext_ptr->ex_blockcount, nr_blocks - blocks_allocated);
> > +		error = xrep_newbt_add_blocks(&btr->newbt,
> > +				XFS_AGB_TO_FSB(mp, agno,
> > +					       ext_ptr->ex_startblock),
> > +				len);
> 
> Alignment.

Will fix both of these.

> > +		if (error)
> > +			do_error(_("could not set up btree reservation: %s\n"),
> > +				strerror(-error));
> > +
> > +		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
> > +				btr->newbt.oinfo.oi_owner);
> > +		if (error)
> > +			do_error(_("could not set up btree rmaps: %s\n"),
> > +				strerror(-error));
> > +
> > +		consume_freespace(agno, ext_ptr, len);
> > +		blocks_allocated += len;
> > +	}
> > +#ifdef XR_BLD_FREE_TRACE
> > +	fprintf(stderr, "blocks_allocated = %d\n",
> > +		blocks_allocated);
> > +#endif
> > +}
> > +
> > +/* Feed one of the new btree blocks to the bulk loader. */
> > +static int
> > +rebuild_claim_block(
> > +	struct xfs_btree_cur	*cur,
> > +	union xfs_btree_ptr	*ptr,
> > +	void			*priv)
> > +{
> > +	struct bt_rebuild	*btr = priv;
> > +
> > +	return xrep_newbt_claim_block(cur, &btr->newbt, ptr);
> > +}
> > +
> 
> Seems like an unnecessary helper, unless this grows more code in later
> patches..?

It doesn't grow any more code, but keep in mind that get_record,
claim_block, and iroot_size are all callbacks of xfs_btree_bload().  The
priv parameter passed to that function are passed unchanged to the three
callbacks.  The bulk load code doesn't know anything about where the
blocks or the records come from, so this is how both repairs will pass
that information to the callbacks.

> >  static void
> >  write_cursor(bt_status_t *curs)
> >  {
> ...
> > @@ -2287,28 +2483,29 @@ keep_fsinos(xfs_mount_t *mp)
> >  
> >  static void
> >  phase5_func(
> > -	xfs_mount_t	*mp,
> > -	xfs_agnumber_t	agno,
> > -	struct xfs_slab	*lost_fsb)
> > +	struct xfs_mount	*mp,
> > +	xfs_agnumber_t		agno,
> > +	struct xfs_slab		*lost_fsb)
> >  {
> > -	uint64_t	num_inos;
> > -	uint64_t	num_free_inos;
> > -	uint64_t	finobt_num_inos;
> > -	uint64_t	finobt_num_free_inos;
> > -	bt_status_t	bno_btree_curs;
> > -	bt_status_t	bcnt_btree_curs;
> > -	bt_status_t	ino_btree_curs;
> > -	bt_status_t	fino_btree_curs;
> > -	bt_status_t	rmap_btree_curs;
> > -	bt_status_t	refcnt_btree_curs;
> > -	int		extra_blocks = 0;
> > -	uint		num_freeblocks;
> > -	xfs_extlen_t	freeblks1;
> > +	struct repair_ctx	sc = { .mp = mp, };
> 
> I don't see any reason to add sc here when it's still unused. It's not
> as if a single variable is saving complexity somewhere else. I guess
> I'll defer to Eric on the approach wrt to the other unused warnings.

<shrug> I'll ask.  It seems dumb to have a prep patch that adds a bunch
of symbols that won't get used until the next patch, but OTOH combining
the two will make for a ~40K patch.

> Also, what's the purpose of the rmap change below? I'm wondering if that
> (along with all of the indentation cleanup) should be its own patch with
> appropriate explanation.

Errk, that one definitely should be separate.

> Brian
> 
> > +	struct agi_stat		agi_stat = {0,};
> > +	uint64_t		num_inos;
> > +	uint64_t		num_free_inos;
> > +	uint64_t		finobt_num_inos;
> > +	uint64_t		finobt_num_free_inos;
> > +	bt_status_t		bno_btree_curs;
> > +	bt_status_t		bcnt_btree_curs;
> > +	bt_status_t		ino_btree_curs;
> > +	bt_status_t		fino_btree_curs;
> > +	bt_status_t		rmap_btree_curs;
> > +	bt_status_t		refcnt_btree_curs;
> > +	int			extra_blocks = 0;
> > +	uint			num_freeblocks;
> > +	xfs_extlen_t		freeblks1;
> >  #ifdef DEBUG
> > -	xfs_extlen_t	freeblks2;
> > +	xfs_extlen_t		freeblks2;
> >  #endif
> > -	xfs_agblock_t	num_extents;
> > -	struct agi_stat	agi_stat = {0,};
> > +	xfs_agblock_t		num_extents;
> >  
> >  	if (verbose)
> >  		do_log(_("        - agno = %d\n"), agno);
> > @@ -2516,8 +2713,8 @@ inject_lost_blocks(
> >  		if (error)
> >  			goto out_cancel;
> >  
> > -		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
> > -					    XFS_AG_RESV_NONE);
> > +		error = -libxfs_free_extent(tp, *fsb, 1,
> > +				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
> >  		if (error)
> >  			goto out_cancel;
> >  
> > 
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-05-20  1:51 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
@ 2020-05-27 12:18   ` Brian Foster
  2020-05-27 22:07     ` Darrick J. Wong
  0 siblings, 1 reply; 20+ messages in thread
From: Brian Foster @ 2020-05-27 12:18 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Tue, May 19, 2020 at 06:51:02PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Create some new support structures and functions to assist phase5 in
> using the btree bulk loader to reconstruct metadata btrees.  This is the
> first step in removing the open-coded rebuilding code.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  repair/phase5.c |  239 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 218 insertions(+), 21 deletions(-)
> 
> 
> diff --git a/repair/phase5.c b/repair/phase5.c
> index 84c05a13..8f5e5f59 100644
> --- a/repair/phase5.c
> +++ b/repair/phase5.c
> @@ -18,6 +18,7 @@
>  #include "progress.h"
>  #include "slab.h"
>  #include "rmap.h"
> +#include "bload.h"
>  
>  /*
>   * we maintain the current slice (path from root to leaf)
...
> @@ -306,6 +324,156 @@ _("error - not enough free space in filesystem\n"));
>  #endif
>  }
>  
...
> +static void
> +consume_freespace(
> +	xfs_agnumber_t		agno,
> +	struct extent_tree_node	*ext_ptr,
> +	uint32_t		len)
> +{
> +	struct extent_tree_node	*bno_ext_ptr;
> +	xfs_agblock_t		new_start = ext_ptr->ex_startblock + len;
> +	xfs_extlen_t		new_len = ext_ptr->ex_blockcount - len;
> +
> +	/* Delete the used-up extent from both extent trees. */
> +#ifdef XR_BLD_FREE_TRACE
> +	fprintf(stderr, "releasing extent: %u [%u %u]\n", agno,
> +			ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
> +#endif
> +	bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
> +	ASSERT(bno_ext_ptr != NULL);
> +	get_bno_extent(agno, bno_ext_ptr);
> +	release_extent_tree_node(bno_ext_ptr);
> +
> +	ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
> +			ext_ptr->ex_blockcount);
> +	release_extent_tree_node(ext_ptr);
> +

Not having looked too deeply at the in-core extent tracking structures,
is there any particular reason we unconditionally remove and reinsert
new records each time around? Is it because we're basically changing the
extent index in the tree? If so, comment please (an update to the
comment below is probably fine). :)

> +	/*
> +	 * If we only used part of this last extent, then we must reinsert the
> +	 * extent in the extent trees.
> +	 */
> +	if (new_len > 0) {
> +		add_bno_extent(agno, new_start, new_len);
> +		add_bcnt_extent(agno, new_start, new_len);
> +	}
> +}
> +
> +/* Reserve blocks for the new btree. */
> +static void
> +setup_rebuild(
> +	struct xfs_mount	*mp,
> +	xfs_agnumber_t		agno,
> +	struct bt_rebuild	*btr,
> +	uint32_t		nr_blocks)
> +{
> +	struct extent_tree_node	*ext_ptr;
> +	uint32_t		blocks_allocated = 0;
> +	uint32_t		len;
> +	int			error;
> +
> +	while (blocks_allocated < nr_blocks)  {
> +		/*
> +		 * Grab the smallest extent and use it up, then get the
> +		 * next smallest.  This mimics the init_*_cursor code.
> +		 */
> +		ext_ptr =  findfirst_bcnt_extent(agno);

Extra whitespace	  ^

> +		if (!ext_ptr)
> +			do_error(
> +_("error - not enough free space in filesystem\n"));
> +
> +		/* Use up the extent we've got. */
> +		len = min(ext_ptr->ex_blockcount, nr_blocks - blocks_allocated);
> +		error = xrep_newbt_add_blocks(&btr->newbt,
> +				XFS_AGB_TO_FSB(mp, agno,
> +					       ext_ptr->ex_startblock),
> +				len);

Alignment.

> +		if (error)
> +			do_error(_("could not set up btree reservation: %s\n"),
> +				strerror(-error));
> +
> +		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
> +				btr->newbt.oinfo.oi_owner);
> +		if (error)
> +			do_error(_("could not set up btree rmaps: %s\n"),
> +				strerror(-error));
> +
> +		consume_freespace(agno, ext_ptr, len);
> +		blocks_allocated += len;
> +	}
> +#ifdef XR_BLD_FREE_TRACE
> +	fprintf(stderr, "blocks_allocated = %d\n",
> +		blocks_allocated);
> +#endif
> +}
> +
> +/* Feed one of the new btree blocks to the bulk loader. */
> +static int
> +rebuild_claim_block(
> +	struct xfs_btree_cur	*cur,
> +	union xfs_btree_ptr	*ptr,
> +	void			*priv)
> +{
> +	struct bt_rebuild	*btr = priv;
> +
> +	return xrep_newbt_claim_block(cur, &btr->newbt, ptr);
> +}
> +

Seems like an unnecessary helper, unless this grows more code in later
patches..?

>  static void
>  write_cursor(bt_status_t *curs)
>  {
...
> @@ -2287,28 +2483,29 @@ keep_fsinos(xfs_mount_t *mp)
>  
>  static void
>  phase5_func(
> -	xfs_mount_t	*mp,
> -	xfs_agnumber_t	agno,
> -	struct xfs_slab	*lost_fsb)
> +	struct xfs_mount	*mp,
> +	xfs_agnumber_t		agno,
> +	struct xfs_slab		*lost_fsb)
>  {
> -	uint64_t	num_inos;
> -	uint64_t	num_free_inos;
> -	uint64_t	finobt_num_inos;
> -	uint64_t	finobt_num_free_inos;
> -	bt_status_t	bno_btree_curs;
> -	bt_status_t	bcnt_btree_curs;
> -	bt_status_t	ino_btree_curs;
> -	bt_status_t	fino_btree_curs;
> -	bt_status_t	rmap_btree_curs;
> -	bt_status_t	refcnt_btree_curs;
> -	int		extra_blocks = 0;
> -	uint		num_freeblocks;
> -	xfs_extlen_t	freeblks1;
> +	struct repair_ctx	sc = { .mp = mp, };

I don't see any reason to add sc here when it's still unused. It's not
as if a single variable is saving complexity somewhere else. I guess
I'll defer to Eric on the approach wrt to the other unused warnings.

Also, what's the purpose of the rmap change below? I'm wondering if that
(along with all of the indentation cleanup) should be its own patch with
appropriate explanation.

Brian

> +	struct agi_stat		agi_stat = {0,};
> +	uint64_t		num_inos;
> +	uint64_t		num_free_inos;
> +	uint64_t		finobt_num_inos;
> +	uint64_t		finobt_num_free_inos;
> +	bt_status_t		bno_btree_curs;
> +	bt_status_t		bcnt_btree_curs;
> +	bt_status_t		ino_btree_curs;
> +	bt_status_t		fino_btree_curs;
> +	bt_status_t		rmap_btree_curs;
> +	bt_status_t		refcnt_btree_curs;
> +	int			extra_blocks = 0;
> +	uint			num_freeblocks;
> +	xfs_extlen_t		freeblks1;
>  #ifdef DEBUG
> -	xfs_extlen_t	freeblks2;
> +	xfs_extlen_t		freeblks2;
>  #endif
> -	xfs_agblock_t	num_extents;
> -	struct agi_stat	agi_stat = {0,};
> +	xfs_agblock_t		num_extents;
>  
>  	if (verbose)
>  		do_log(_("        - agno = %d\n"), agno);
> @@ -2516,8 +2713,8 @@ inject_lost_blocks(
>  		if (error)
>  			goto out_cancel;
>  
> -		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
> -					    XFS_AG_RESV_NONE);
> +		error = -libxfs_free_extent(tp, *fsb, 1,
> +				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
>  		if (error)
>  			goto out_cancel;
>  
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-05-20  1:50 [PATCH v5 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
@ 2020-05-20  1:51 ` Darrick J. Wong
  2020-05-27 12:18   ` Brian Foster
  0 siblings, 1 reply; 20+ messages in thread
From: Darrick J. Wong @ 2020-05-20  1:51 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <darrick.wong@oracle.com>

Create some new support structures and functions to assist phase5 in
using the btree bulk loader to reconstruct metadata btrees.  This is the
first step in removing the open-coded rebuilding code.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |  239 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 218 insertions(+), 21 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index 84c05a13..8f5e5f59 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -18,6 +18,7 @@
 #include "progress.h"
 #include "slab.h"
 #include "rmap.h"
+#include "bload.h"
 
 /*
  * we maintain the current slice (path from root to leaf)
@@ -65,6 +66,23 @@ typedef struct bt_status  {
 	uint64_t		owner;		/* owner */
 } bt_status_t;
 
+/* Context for rebuilding a per-AG btree. */
+struct bt_rebuild {
+	/* Fake root for staging and space preallocations. */
+	struct xrep_newbt	newbt;
+
+	/* Geometry of the new btree. */
+	struct xfs_btree_bload	bload;
+
+	/* Staging btree cursor for the new tree. */
+	struct xfs_btree_cur	*cur;
+
+	/* Tree-specific data. */
+	union {
+		struct xfs_slab_cursor	*slab_cursor;
+	};
+};
+
 /*
  * extra metadata for the agi
  */
@@ -306,6 +324,156 @@ _("error - not enough free space in filesystem\n"));
 #endif
 }
 
+/*
+ * Estimate proper slack values for a btree that's being reloaded.
+ *
+ * Under most circumstances, we'll take whatever default loading value the
+ * btree bulk loading code calculates for us.  However, there are some
+ * exceptions to this rule:
+ *
+ * (1) If someone turned one of the debug knobs.
+ * (2) The AG has less than ~9% space free.
+ *
+ * Note that we actually use 3/32 for the comparison to avoid division.
+ */
+static void
+estimate_ag_bload_slack(
+	struct repair_ctx	*sc,
+	struct xfs_btree_bload	*bload,
+	unsigned int		free)
+{
+	/*
+	 * The global values are set to -1 (i.e. take the bload defaults)
+	 * unless someone has set them otherwise, so we just pull the values
+	 * here.
+	 */
+	bload->leaf_slack = bload_leaf_slack;
+	bload->node_slack = bload_node_slack;
+
+	/* No further changes if there's more than 3/32ths space left. */
+	if (free >= ((sc->mp->m_sb.sb_agblocks * 3) >> 5))
+		return;
+
+	/* We're low on space; load the btrees as tightly as possible. */
+	if (bload->leaf_slack < 0)
+		bload->leaf_slack = 0;
+	if (bload->node_slack < 0)
+		bload->node_slack = 0;
+}
+
+/* Initialize a btree rebuild context. */
+static void
+init_rebuild(
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_agblock_t			free_space,
+	struct bt_rebuild		*btr)
+{
+	memset(btr, 0, sizeof(struct bt_rebuild));
+
+	xrep_newbt_init_bare(&btr->newbt, sc);
+	btr->newbt.oinfo = *oinfo; /* struct copy */
+	estimate_ag_bload_slack(sc, &btr->bload, free_space);
+}
+
+/*
+ * Update this free space record to reflect the blocks we stole from the
+ * beginning of the record.
+ */
+static void
+consume_freespace(
+	xfs_agnumber_t		agno,
+	struct extent_tree_node	*ext_ptr,
+	uint32_t		len)
+{
+	struct extent_tree_node	*bno_ext_ptr;
+	xfs_agblock_t		new_start = ext_ptr->ex_startblock + len;
+	xfs_extlen_t		new_len = ext_ptr->ex_blockcount - len;
+
+	/* Delete the used-up extent from both extent trees. */
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "releasing extent: %u [%u %u]\n", agno,
+			ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+#endif
+	bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
+	ASSERT(bno_ext_ptr != NULL);
+	get_bno_extent(agno, bno_ext_ptr);
+	release_extent_tree_node(bno_ext_ptr);
+
+	ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
+			ext_ptr->ex_blockcount);
+	release_extent_tree_node(ext_ptr);
+
+	/*
+	 * If we only used part of this last extent, then we must reinsert the
+	 * extent in the extent trees.
+	 */
+	if (new_len > 0) {
+		add_bno_extent(agno, new_start, new_len);
+		add_bcnt_extent(agno, new_start, new_len);
+	}
+}
+
+/* Reserve blocks for the new btree. */
+static void
+setup_rebuild(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr,
+	uint32_t		nr_blocks)
+{
+	struct extent_tree_node	*ext_ptr;
+	uint32_t		blocks_allocated = 0;
+	uint32_t		len;
+	int			error;
+
+	while (blocks_allocated < nr_blocks)  {
+		/*
+		 * Grab the smallest extent and use it up, then get the
+		 * next smallest.  This mimics the init_*_cursor code.
+		 */
+		ext_ptr =  findfirst_bcnt_extent(agno);
+		if (!ext_ptr)
+			do_error(
+_("error - not enough free space in filesystem\n"));
+
+		/* Use up the extent we've got. */
+		len = min(ext_ptr->ex_blockcount, nr_blocks - blocks_allocated);
+		error = xrep_newbt_add_blocks(&btr->newbt,
+				XFS_AGB_TO_FSB(mp, agno,
+					       ext_ptr->ex_startblock),
+				len);
+		if (error)
+			do_error(_("could not set up btree reservation: %s\n"),
+				strerror(-error));
+
+		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
+				btr->newbt.oinfo.oi_owner);
+		if (error)
+			do_error(_("could not set up btree rmaps: %s\n"),
+				strerror(-error));
+
+		consume_freespace(agno, ext_ptr, len);
+		blocks_allocated += len;
+	}
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "blocks_allocated = %d\n",
+		blocks_allocated);
+#endif
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+static int
+rebuild_claim_block(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	void			*priv)
+{
+	struct bt_rebuild	*btr = priv;
+
+	return xrep_newbt_claim_block(cur, &btr->newbt, ptr);
+}
+
 static void
 write_cursor(bt_status_t *curs)
 {
@@ -336,6 +504,34 @@ finish_cursor(bt_status_t *curs)
 	free(curs->btree_blocks);
 }
 
+/*
+ * Scoop up leftovers from a rebuild cursor for later freeing, then free the
+ * rebuild context.
+ */
+static void
+finish_rebuild(
+	struct xfs_mount	*mp,
+	struct bt_rebuild	*btr,
+	struct xfs_slab		*lost_fsb)
+{
+	struct xrep_newbt_resv	*resv, *n;
+
+	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
+		while (resv->used < resv->len) {
+			xfs_fsblock_t	fsb = resv->fsbno + resv->used;
+			int		error;
+
+			error = slab_add(lost_fsb, &fsb);
+			if (error)
+				do_error(
+_("Insufficient memory saving lost blocks.\n"));
+			resv->used++;
+		}
+	}
+
+	xrep_newbt_destroy(&btr->newbt, 0);
+}
+
 /*
  * We need to leave some free records in the tree for the corner case of
  * setting up the AGFL. This may require allocation of blocks, and as
@@ -2287,28 +2483,29 @@ keep_fsinos(xfs_mount_t *mp)
 
 static void
 phase5_func(
-	xfs_mount_t	*mp,
-	xfs_agnumber_t	agno,
-	struct xfs_slab	*lost_fsb)
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct xfs_slab		*lost_fsb)
 {
-	uint64_t	num_inos;
-	uint64_t	num_free_inos;
-	uint64_t	finobt_num_inos;
-	uint64_t	finobt_num_free_inos;
-	bt_status_t	bno_btree_curs;
-	bt_status_t	bcnt_btree_curs;
-	bt_status_t	ino_btree_curs;
-	bt_status_t	fino_btree_curs;
-	bt_status_t	rmap_btree_curs;
-	bt_status_t	refcnt_btree_curs;
-	int		extra_blocks = 0;
-	uint		num_freeblocks;
-	xfs_extlen_t	freeblks1;
+	struct repair_ctx	sc = { .mp = mp, };
+	struct agi_stat		agi_stat = {0,};
+	uint64_t		num_inos;
+	uint64_t		num_free_inos;
+	uint64_t		finobt_num_inos;
+	uint64_t		finobt_num_free_inos;
+	bt_status_t		bno_btree_curs;
+	bt_status_t		bcnt_btree_curs;
+	bt_status_t		ino_btree_curs;
+	bt_status_t		fino_btree_curs;
+	bt_status_t		rmap_btree_curs;
+	bt_status_t		refcnt_btree_curs;
+	int			extra_blocks = 0;
+	uint			num_freeblocks;
+	xfs_extlen_t		freeblks1;
 #ifdef DEBUG
-	xfs_extlen_t	freeblks2;
+	xfs_extlen_t		freeblks2;
 #endif
-	xfs_agblock_t	num_extents;
-	struct agi_stat	agi_stat = {0,};
+	xfs_agblock_t		num_extents;
 
 	if (verbose)
 		do_log(_("        - agno = %d\n"), agno);
@@ -2516,8 +2713,8 @@ inject_lost_blocks(
 		if (error)
 			goto out_cancel;
 
-		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
-					    XFS_AG_RESV_NONE);
+		error = -libxfs_free_extent(tp, *fsb, 1,
+				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
 		if (error)
 			goto out_cancel;
 


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-05-14 15:11   ` Brian Foster
@ 2020-05-14 19:47     ` Darrick J. Wong
  0 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-05-14 19:47 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Thu, May 14, 2020 at 11:11:07AM -0400, Brian Foster wrote:
> On Sat, May 09, 2020 at 09:31:59AM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Create some new support structures and functions to assist phase5 in
> > using the btree bulk loader to reconstruct metadata btrees.  This is the
> > first step in removing the open-coded rebuilding code.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> 
> FYI, unused variable warnings:
> 
> phase5.c: In function ‘phase5_func’:
> phase5.c:2491:20: warning: unused variable ‘sc’ [-Wunused-variable]
>  2491 |  struct repair_ctx sc = { .mp = mp, };
>       |                    ^~
> At top level:
> phase5.c:509:1: warning: ‘finish_rebuild’ defined but not used [-Wunused-function]
>   509 | finish_rebuild(
>       | ^~~~~~~~~~~~~~
> phase5.c:468:1: warning: ‘rebuild_alloc_block’ defined but not used [-Wunused-function]
>   468 | rebuild_alloc_block(
>       | ^~~~~~~~~~~~~~~~~~~
> phase5.c:381:1: warning: ‘setup_rebuild’ defined but not used [-Wunused-function]
>   381 | setup_rebuild(
>       | ^~~~~~~~~~~~~
> phase5.c:366:1: warning: ‘init_rebuild’ defined but not used [-Wunused-function]
>   366 | init_rebuild(
>       | ^~~~~~~~~~~~

Yeah... these aren't used by anything until the next patch, and I was
trying to keep the "infrastructure you need for X" changes separate from
"X".  Though every time I do that, people complain about me adding code
that isn't used anywhere...

> >  repair/phase5.c |  240 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 219 insertions(+), 21 deletions(-)
> > 
> > 
> > diff --git a/repair/phase5.c b/repair/phase5.c
> > index f3be15de..7eb24519 100644
> > --- a/repair/phase5.c
> > +++ b/repair/phase5.c
> ...
> > @@ -306,6 +324,157 @@ _("error - not enough free space in filesystem\n"));
> ...
> > +/* Reserve blocks for the new btree. */
> > +static void
> > +setup_rebuild(
> > +	struct xfs_mount	*mp,
> > +	xfs_agnumber_t		agno,
> > +	struct bt_rebuild	*btr,
> > +	uint32_t		nr_blocks)
> > +{
> > +	struct extent_tree_node	*ext_ptr;
> > +	struct extent_tree_node	*bno_ext_ptr;
> > +	uint32_t		blocks_allocated = 0;
> > +	int			error;
> > +
> > +	/*
> > +	 * grab the smallest extent and use it up, then get the
> > +	 * next smallest.  This mimics the init_*_cursor code.
> > +	 */
> > +	ext_ptr =  findfirst_bcnt_extent(agno);
> > +
> > +	/*
> > +	 * set up the free block array
> > +	 */
> > +	while (blocks_allocated < nr_blocks)  {
> > +		uint64_t	len;
> > +		xfs_agblock_t	new_start;
> > +		xfs_extlen_t	new_len;
> > +
> > +		if (!ext_ptr)
> > +			do_error(
> > +_("error - not enough free space in filesystem\n"));
> > +
> > +		/* Use up the extent we've got. */
> > +		len = min(ext_ptr->ex_blockcount,
> > +				btr->bload.nr_blocks - blocks_allocated);
> 
> What's the difference between the nr_blocks parameter and this one?

I think that's a bug, and should have been:

	len = min(ext_ptr->ex_blockcount, nr_blocks - blocks_allocated);

> > +		error = xrep_newbt_add_reservation(&btr->newbt,
> > +				XFS_AGB_TO_FSB(mp, agno,
> > +					       ext_ptr->ex_startblock),
> > +				len, NULL);
> > +		if (error)
> > +			do_error(_("could not set up btree reservation: %s\n"),
> > +				strerror(-error));
> > +		blocks_allocated += len;
> > +
> > +		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
> > +				btr->newbt.oinfo.oi_owner);
> > +		if (error)
> > +			do_error(_("could not set up btree rmaps: %s\n"),
> > +				strerror(-error));
> > +
> > +		/* Figure out if we're putting anything back. */
> 
> The remaining extent replacement bits of this loop looks like it could
> warrant a little helper and a comment to explain exactly what's
> happening at a high level.

Ok, I'll figure something out.

--D

> Brian
> 
> > +		new_start = ext_ptr->ex_startblock + len;
> > +		new_len = ext_ptr->ex_blockcount - len;
> > +
> > +		/* Delete the used-up extent from both extent trees. */
> > +#ifdef XR_BLD_FREE_TRACE
> > +		fprintf(stderr, "releasing extent: %u [%u %u]\n",
> > +			agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
> > +#endif
> > +		bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
> > +		ASSERT(bno_ext_ptr != NULL);
> > +		get_bno_extent(agno, bno_ext_ptr);
> > +		release_extent_tree_node(bno_ext_ptr);
> > +
> > +		ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
> > +				ext_ptr->ex_blockcount);
> > +		ASSERT(ext_ptr != NULL);
> > +		release_extent_tree_node(ext_ptr);
> > +
> > +		/*
> > +		 * If we only used part of this last extent, then we need only
> > +		 * to reinsert the extent in the extent trees and we're done.
> > +		 */
> > +		if (new_len > 0) {
> > +			add_bno_extent(agno, new_start, new_len);
> > +			add_bcnt_extent(agno, new_start, new_len);
> > +			break;
> > +		}
> > +
> > +		/* Otherwise, find the next biggest extent. */
> > +		ext_ptr = findfirst_bcnt_extent(agno);
> > +	}
> > +#ifdef XR_BLD_FREE_TRACE
> > +	fprintf(stderr, "blocks_allocated = %d\n",
> > +		blocks_allocated);
> > +#endif
> > +}
> > +
> > +/* Feed one of the new btree blocks to the bulk loader. */
> > +static int
> > +rebuild_alloc_block(
> > +	struct xfs_btree_cur	*cur,
> > +	union xfs_btree_ptr	*ptr,
> > +	void			*priv)
> > +{
> > +	struct bt_rebuild	*btr = priv;
> > +
> > +	return xrep_newbt_claim_block(cur, &btr->newbt, ptr);
> > +}
> > +
> >  static void
> >  write_cursor(bt_status_t *curs)
> >  {
> > @@ -336,6 +505,34 @@ finish_cursor(bt_status_t *curs)
> >  	free(curs->btree_blocks);
> >  }
> >  
> > +static void
> > +finish_rebuild(
> > +	struct xfs_mount	*mp,
> > +	struct bt_rebuild	*btr)
> > +{
> > +	struct xrep_newbt_resv	*resv, *n;
> > +
> > +	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
> > +		xfs_agnumber_t	agno;
> > +		xfs_agblock_t	bno;
> > +		xfs_extlen_t	len;
> > +
> > +		if (resv->used >= resv->len)
> > +			continue;
> > +
> > +		/* XXX: Shouldn't this go on the AGFL? */
> > +		/* Put back everything we didn't use. */
> > +		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
> > +		agno = XFS_FSB_TO_AGNO(mp, resv->fsbno + resv->used);
> > +		len = resv->len - resv->used;
> > +
> > +		add_bno_extent(agno, bno, len);
> > +		add_bcnt_extent(agno, bno, len);
> > +	}
> > +
> > +	xrep_newbt_destroy(&btr->newbt, 0);
> > +}
> > +
> >  /*
> >   * We need to leave some free records in the tree for the corner case of
> >   * setting up the AGFL. This may require allocation of blocks, and as
> > @@ -2290,28 +2487,29 @@ keep_fsinos(xfs_mount_t *mp)
> >  
> >  static void
> >  phase5_func(
> > -	xfs_mount_t	*mp,
> > -	xfs_agnumber_t	agno,
> > -	struct xfs_slab	*lost_fsb)
> > +	struct xfs_mount	*mp,
> > +	xfs_agnumber_t		agno,
> > +	struct xfs_slab		*lost_fsb)
> >  {
> > -	uint64_t	num_inos;
> > -	uint64_t	num_free_inos;
> > -	uint64_t	finobt_num_inos;
> > -	uint64_t	finobt_num_free_inos;
> > -	bt_status_t	bno_btree_curs;
> > -	bt_status_t	bcnt_btree_curs;
> > -	bt_status_t	ino_btree_curs;
> > -	bt_status_t	fino_btree_curs;
> > -	bt_status_t	rmap_btree_curs;
> > -	bt_status_t	refcnt_btree_curs;
> > -	int		extra_blocks = 0;
> > -	uint		num_freeblocks;
> > -	xfs_extlen_t	freeblks1;
> > +	struct repair_ctx	sc = { .mp = mp, };
> > +	struct agi_stat		agi_stat = {0,};
> > +	uint64_t		num_inos;
> > +	uint64_t		num_free_inos;
> > +	uint64_t		finobt_num_inos;
> > +	uint64_t		finobt_num_free_inos;
> > +	bt_status_t		bno_btree_curs;
> > +	bt_status_t		bcnt_btree_curs;
> > +	bt_status_t		ino_btree_curs;
> > +	bt_status_t		fino_btree_curs;
> > +	bt_status_t		rmap_btree_curs;
> > +	bt_status_t		refcnt_btree_curs;
> > +	int			extra_blocks = 0;
> > +	uint			num_freeblocks;
> > +	xfs_extlen_t		freeblks1;
> >  #ifdef DEBUG
> > -	xfs_extlen_t	freeblks2;
> > +	xfs_extlen_t		freeblks2;
> >  #endif
> > -	xfs_agblock_t	num_extents;
> > -	struct agi_stat	agi_stat = {0,};
> > +	xfs_agblock_t		num_extents;
> >  
> >  	if (verbose)
> >  		do_log(_("        - agno = %d\n"), agno);
> > @@ -2533,8 +2731,8 @@ inject_lost_blocks(
> >  		if (error)
> >  			goto out_cancel;
> >  
> > -		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
> > -					    XFS_AG_RESV_NONE);
> > +		error = -libxfs_free_extent(tp, *fsb, 1,
> > +				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
> >  		if (error)
> >  			goto out_cancel;
> >  
> > 
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-05-09 16:31 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
@ 2020-05-14 15:11   ` Brian Foster
  2020-05-14 19:47     ` Darrick J. Wong
  0 siblings, 1 reply; 20+ messages in thread
From: Brian Foster @ 2020-05-14 15:11 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Sat, May 09, 2020 at 09:31:59AM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Create some new support structures and functions to assist phase5 in
> using the btree bulk loader to reconstruct metadata btrees.  This is the
> first step in removing the open-coded rebuilding code.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---

FYI, unused variable warnings:

phase5.c: In function ‘phase5_func’:
phase5.c:2491:20: warning: unused variable ‘sc’ [-Wunused-variable]
 2491 |  struct repair_ctx sc = { .mp = mp, };
      |                    ^~
At top level:
phase5.c:509:1: warning: ‘finish_rebuild’ defined but not used [-Wunused-function]
  509 | finish_rebuild(
      | ^~~~~~~~~~~~~~
phase5.c:468:1: warning: ‘rebuild_alloc_block’ defined but not used [-Wunused-function]
  468 | rebuild_alloc_block(
      | ^~~~~~~~~~~~~~~~~~~
phase5.c:381:1: warning: ‘setup_rebuild’ defined but not used [-Wunused-function]
  381 | setup_rebuild(
      | ^~~~~~~~~~~~~
phase5.c:366:1: warning: ‘init_rebuild’ defined but not used [-Wunused-function]
  366 | init_rebuild(
      | ^~~~~~~~~~~~

>  repair/phase5.c |  240 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 219 insertions(+), 21 deletions(-)
> 
> 
> diff --git a/repair/phase5.c b/repair/phase5.c
> index f3be15de..7eb24519 100644
> --- a/repair/phase5.c
> +++ b/repair/phase5.c
...
> @@ -306,6 +324,157 @@ _("error - not enough free space in filesystem\n"));
...
> +/* Reserve blocks for the new btree. */
> +static void
> +setup_rebuild(
> +	struct xfs_mount	*mp,
> +	xfs_agnumber_t		agno,
> +	struct bt_rebuild	*btr,
> +	uint32_t		nr_blocks)
> +{
> +	struct extent_tree_node	*ext_ptr;
> +	struct extent_tree_node	*bno_ext_ptr;
> +	uint32_t		blocks_allocated = 0;
> +	int			error;
> +
> +	/*
> +	 * grab the smallest extent and use it up, then get the
> +	 * next smallest.  This mimics the init_*_cursor code.
> +	 */
> +	ext_ptr =  findfirst_bcnt_extent(agno);
> +
> +	/*
> +	 * set up the free block array
> +	 */
> +	while (blocks_allocated < nr_blocks)  {
> +		uint64_t	len;
> +		xfs_agblock_t	new_start;
> +		xfs_extlen_t	new_len;
> +
> +		if (!ext_ptr)
> +			do_error(
> +_("error - not enough free space in filesystem\n"));
> +
> +		/* Use up the extent we've got. */
> +		len = min(ext_ptr->ex_blockcount,
> +				btr->bload.nr_blocks - blocks_allocated);

What's the difference between the nr_blocks parameter and this one?

> +		error = xrep_newbt_add_reservation(&btr->newbt,
> +				XFS_AGB_TO_FSB(mp, agno,
> +					       ext_ptr->ex_startblock),
> +				len, NULL);
> +		if (error)
> +			do_error(_("could not set up btree reservation: %s\n"),
> +				strerror(-error));
> +		blocks_allocated += len;
> +
> +		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
> +				btr->newbt.oinfo.oi_owner);
> +		if (error)
> +			do_error(_("could not set up btree rmaps: %s\n"),
> +				strerror(-error));
> +
> +		/* Figure out if we're putting anything back. */

The remaining extent replacement bits of this loop looks like it could
warrant a little helper and a comment to explain exactly what's
happening at a high level.

Brian

> +		new_start = ext_ptr->ex_startblock + len;
> +		new_len = ext_ptr->ex_blockcount - len;
> +
> +		/* Delete the used-up extent from both extent trees. */
> +#ifdef XR_BLD_FREE_TRACE
> +		fprintf(stderr, "releasing extent: %u [%u %u]\n",
> +			agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
> +#endif
> +		bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
> +		ASSERT(bno_ext_ptr != NULL);
> +		get_bno_extent(agno, bno_ext_ptr);
> +		release_extent_tree_node(bno_ext_ptr);
> +
> +		ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
> +				ext_ptr->ex_blockcount);
> +		ASSERT(ext_ptr != NULL);
> +		release_extent_tree_node(ext_ptr);
> +
> +		/*
> +		 * If we only used part of this last extent, then we need only
> +		 * to reinsert the extent in the extent trees and we're done.
> +		 */
> +		if (new_len > 0) {
> +			add_bno_extent(agno, new_start, new_len);
> +			add_bcnt_extent(agno, new_start, new_len);
> +			break;
> +		}
> +
> +		/* Otherwise, find the next biggest extent. */
> +		ext_ptr = findfirst_bcnt_extent(agno);
> +	}
> +#ifdef XR_BLD_FREE_TRACE
> +	fprintf(stderr, "blocks_allocated = %d\n",
> +		blocks_allocated);
> +#endif
> +}
> +
> +/* Feed one of the new btree blocks to the bulk loader. */
> +static int
> +rebuild_alloc_block(
> +	struct xfs_btree_cur	*cur,
> +	union xfs_btree_ptr	*ptr,
> +	void			*priv)
> +{
> +	struct bt_rebuild	*btr = priv;
> +
> +	return xrep_newbt_claim_block(cur, &btr->newbt, ptr);
> +}
> +
>  static void
>  write_cursor(bt_status_t *curs)
>  {
> @@ -336,6 +505,34 @@ finish_cursor(bt_status_t *curs)
>  	free(curs->btree_blocks);
>  }
>  
> +static void
> +finish_rebuild(
> +	struct xfs_mount	*mp,
> +	struct bt_rebuild	*btr)
> +{
> +	struct xrep_newbt_resv	*resv, *n;
> +
> +	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
> +		xfs_agnumber_t	agno;
> +		xfs_agblock_t	bno;
> +		xfs_extlen_t	len;
> +
> +		if (resv->used >= resv->len)
> +			continue;
> +
> +		/* XXX: Shouldn't this go on the AGFL? */
> +		/* Put back everything we didn't use. */
> +		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
> +		agno = XFS_FSB_TO_AGNO(mp, resv->fsbno + resv->used);
> +		len = resv->len - resv->used;
> +
> +		add_bno_extent(agno, bno, len);
> +		add_bcnt_extent(agno, bno, len);
> +	}
> +
> +	xrep_newbt_destroy(&btr->newbt, 0);
> +}
> +
>  /*
>   * We need to leave some free records in the tree for the corner case of
>   * setting up the AGFL. This may require allocation of blocks, and as
> @@ -2290,28 +2487,29 @@ keep_fsinos(xfs_mount_t *mp)
>  
>  static void
>  phase5_func(
> -	xfs_mount_t	*mp,
> -	xfs_agnumber_t	agno,
> -	struct xfs_slab	*lost_fsb)
> +	struct xfs_mount	*mp,
> +	xfs_agnumber_t		agno,
> +	struct xfs_slab		*lost_fsb)
>  {
> -	uint64_t	num_inos;
> -	uint64_t	num_free_inos;
> -	uint64_t	finobt_num_inos;
> -	uint64_t	finobt_num_free_inos;
> -	bt_status_t	bno_btree_curs;
> -	bt_status_t	bcnt_btree_curs;
> -	bt_status_t	ino_btree_curs;
> -	bt_status_t	fino_btree_curs;
> -	bt_status_t	rmap_btree_curs;
> -	bt_status_t	refcnt_btree_curs;
> -	int		extra_blocks = 0;
> -	uint		num_freeblocks;
> -	xfs_extlen_t	freeblks1;
> +	struct repair_ctx	sc = { .mp = mp, };
> +	struct agi_stat		agi_stat = {0,};
> +	uint64_t		num_inos;
> +	uint64_t		num_free_inos;
> +	uint64_t		finobt_num_inos;
> +	uint64_t		finobt_num_free_inos;
> +	bt_status_t		bno_btree_curs;
> +	bt_status_t		bcnt_btree_curs;
> +	bt_status_t		ino_btree_curs;
> +	bt_status_t		fino_btree_curs;
> +	bt_status_t		rmap_btree_curs;
> +	bt_status_t		refcnt_btree_curs;
> +	int			extra_blocks = 0;
> +	uint			num_freeblocks;
> +	xfs_extlen_t		freeblks1;
>  #ifdef DEBUG
> -	xfs_extlen_t	freeblks2;
> +	xfs_extlen_t		freeblks2;
>  #endif
> -	xfs_agblock_t	num_extents;
> -	struct agi_stat	agi_stat = {0,};
> +	xfs_agblock_t		num_extents;
>  
>  	if (verbose)
>  		do_log(_("        - agno = %d\n"), agno);
> @@ -2533,8 +2731,8 @@ inject_lost_blocks(
>  		if (error)
>  			goto out_cancel;
>  
> -		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
> -					    XFS_AG_RESV_NONE);
> +		error = -libxfs_free_extent(tp, *fsb, 1,
> +				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
>  		if (error)
>  			goto out_cancel;
>  
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-05-09 16:31 [PATCH v4 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
@ 2020-05-09 16:31 ` Darrick J. Wong
  2020-05-14 15:11   ` Brian Foster
  0 siblings, 1 reply; 20+ messages in thread
From: Darrick J. Wong @ 2020-05-09 16:31 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <darrick.wong@oracle.com>

Create some new support structures and functions to assist phase5 in
using the btree bulk loader to reconstruct metadata btrees.  This is the
first step in removing the open-coded rebuilding code.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |  240 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 219 insertions(+), 21 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index f3be15de..7eb24519 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -18,6 +18,7 @@
 #include "progress.h"
 #include "slab.h"
 #include "rmap.h"
+#include "bload.h"
 
 /*
  * we maintain the current slice (path from root to leaf)
@@ -65,6 +66,23 @@ typedef struct bt_status  {
 	uint64_t		owner;		/* owner */
 } bt_status_t;
 
+/* Context for rebuilding a per-AG btree. */
+struct bt_rebuild {
+	/* Fake root for staging and space preallocations. */
+	struct xrep_newbt	newbt;
+
+	/* Geometry of the new btree. */
+	struct xfs_btree_bload	bload;
+
+	/* Staging btree cursor for the new tree. */
+	struct xfs_btree_cur	*cur;
+
+	/* Tree-specific data. */
+	union {
+		struct xfs_slab_cursor	*slab_cursor;
+	};
+};
+
 /*
  * extra metadata for the agi
  */
@@ -306,6 +324,157 @@ _("error - not enough free space in filesystem\n"));
 #endif
 }
 
+/*
+ * Estimate proper slack values for a btree that's being reloaded.
+ *
+ * Under most circumstances, we'll take whatever default loading value the
+ * btree bulk loading code calculates for us.  However, there are some
+ * exceptions to this rule:
+ *
+ * (1) If someone turned one of the debug knobs.
+ * (2) The AG has less than ~9% space free.
+ *
+ * Note that we actually use 3/32 for the comparison to avoid division.
+ */
+static void
+estimate_ag_bload_slack(
+	struct repair_ctx	*sc,
+	struct xfs_btree_bload	*bload,
+	unsigned int		free)
+{
+	/*
+	 * The global values are set to -1 (i.e. take the bload defaults)
+	 * unless someone has set them otherwise, so we just pull the values
+	 * here.
+	 */
+	bload->leaf_slack = bload_leaf_slack;
+	bload->node_slack = bload_node_slack;
+
+	/* No further changes if there's more than 3/32ths space left. */
+	if (free >= ((sc->mp->m_sb.sb_agblocks * 3) >> 5))
+		return;
+
+	/* We're low on space; load the btrees as tightly as possible. */
+	if (bload->leaf_slack < 0)
+		bload->leaf_slack = 0;
+	if (bload->node_slack < 0)
+		bload->node_slack = 0;
+}
+
+/* Initialize a btree rebuild context. */
+static void
+init_rebuild(
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_agblock_t			free_space,
+	struct bt_rebuild		*btr)
+{
+	memset(btr, 0, sizeof(struct bt_rebuild));
+
+	xrep_newbt_init_bare(&btr->newbt, sc);
+	btr->newbt.oinfo = *oinfo; /* struct copy */
+	estimate_ag_bload_slack(sc, &btr->bload, free_space);
+}
+
+/* Reserve blocks for the new btree. */
+static void
+setup_rebuild(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr,
+	uint32_t		nr_blocks)
+{
+	struct extent_tree_node	*ext_ptr;
+	struct extent_tree_node	*bno_ext_ptr;
+	uint32_t		blocks_allocated = 0;
+	int			error;
+
+	/*
+	 * grab the smallest extent and use it up, then get the
+	 * next smallest.  This mimics the init_*_cursor code.
+	 */
+	ext_ptr =  findfirst_bcnt_extent(agno);
+
+	/*
+	 * set up the free block array
+	 */
+	while (blocks_allocated < nr_blocks)  {
+		uint64_t	len;
+		xfs_agblock_t	new_start;
+		xfs_extlen_t	new_len;
+
+		if (!ext_ptr)
+			do_error(
+_("error - not enough free space in filesystem\n"));
+
+		/* Use up the extent we've got. */
+		len = min(ext_ptr->ex_blockcount,
+				btr->bload.nr_blocks - blocks_allocated);
+		error = xrep_newbt_add_reservation(&btr->newbt,
+				XFS_AGB_TO_FSB(mp, agno,
+					       ext_ptr->ex_startblock),
+				len, NULL);
+		if (error)
+			do_error(_("could not set up btree reservation: %s\n"),
+				strerror(-error));
+		blocks_allocated += len;
+
+		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
+				btr->newbt.oinfo.oi_owner);
+		if (error)
+			do_error(_("could not set up btree rmaps: %s\n"),
+				strerror(-error));
+
+		/* Figure out if we're putting anything back. */
+		new_start = ext_ptr->ex_startblock + len;
+		new_len = ext_ptr->ex_blockcount - len;
+
+		/* Delete the used-up extent from both extent trees. */
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "releasing extent: %u [%u %u]\n",
+			agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+#endif
+		bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
+		ASSERT(bno_ext_ptr != NULL);
+		get_bno_extent(agno, bno_ext_ptr);
+		release_extent_tree_node(bno_ext_ptr);
+
+		ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
+				ext_ptr->ex_blockcount);
+		ASSERT(ext_ptr != NULL);
+		release_extent_tree_node(ext_ptr);
+
+		/*
+		 * If we only used part of this last extent, then we need only
+		 * to reinsert the extent in the extent trees and we're done.
+		 */
+		if (new_len > 0) {
+			add_bno_extent(agno, new_start, new_len);
+			add_bcnt_extent(agno, new_start, new_len);
+			break;
+		}
+
+		/* Otherwise, find the next biggest extent. */
+		ext_ptr = findfirst_bcnt_extent(agno);
+	}
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "blocks_allocated = %d\n",
+		blocks_allocated);
+#endif
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+static int
+rebuild_alloc_block(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	void			*priv)
+{
+	struct bt_rebuild	*btr = priv;
+
+	return xrep_newbt_claim_block(cur, &btr->newbt, ptr);
+}
+
 static void
 write_cursor(bt_status_t *curs)
 {
@@ -336,6 +505,34 @@ finish_cursor(bt_status_t *curs)
 	free(curs->btree_blocks);
 }
 
+static void
+finish_rebuild(
+	struct xfs_mount	*mp,
+	struct bt_rebuild	*btr)
+{
+	struct xrep_newbt_resv	*resv, *n;
+
+	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
+		xfs_agnumber_t	agno;
+		xfs_agblock_t	bno;
+		xfs_extlen_t	len;
+
+		if (resv->used >= resv->len)
+			continue;
+
+		/* XXX: Shouldn't this go on the AGFL? */
+		/* Put back everything we didn't use. */
+		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
+		agno = XFS_FSB_TO_AGNO(mp, resv->fsbno + resv->used);
+		len = resv->len - resv->used;
+
+		add_bno_extent(agno, bno, len);
+		add_bcnt_extent(agno, bno, len);
+	}
+
+	xrep_newbt_destroy(&btr->newbt, 0);
+}
+
 /*
  * We need to leave some free records in the tree for the corner case of
  * setting up the AGFL. This may require allocation of blocks, and as
@@ -2290,28 +2487,29 @@ keep_fsinos(xfs_mount_t *mp)
 
 static void
 phase5_func(
-	xfs_mount_t	*mp,
-	xfs_agnumber_t	agno,
-	struct xfs_slab	*lost_fsb)
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct xfs_slab		*lost_fsb)
 {
-	uint64_t	num_inos;
-	uint64_t	num_free_inos;
-	uint64_t	finobt_num_inos;
-	uint64_t	finobt_num_free_inos;
-	bt_status_t	bno_btree_curs;
-	bt_status_t	bcnt_btree_curs;
-	bt_status_t	ino_btree_curs;
-	bt_status_t	fino_btree_curs;
-	bt_status_t	rmap_btree_curs;
-	bt_status_t	refcnt_btree_curs;
-	int		extra_blocks = 0;
-	uint		num_freeblocks;
-	xfs_extlen_t	freeblks1;
+	struct repair_ctx	sc = { .mp = mp, };
+	struct agi_stat		agi_stat = {0,};
+	uint64_t		num_inos;
+	uint64_t		num_free_inos;
+	uint64_t		finobt_num_inos;
+	uint64_t		finobt_num_free_inos;
+	bt_status_t		bno_btree_curs;
+	bt_status_t		bcnt_btree_curs;
+	bt_status_t		ino_btree_curs;
+	bt_status_t		fino_btree_curs;
+	bt_status_t		rmap_btree_curs;
+	bt_status_t		refcnt_btree_curs;
+	int			extra_blocks = 0;
+	uint			num_freeblocks;
+	xfs_extlen_t		freeblks1;
 #ifdef DEBUG
-	xfs_extlen_t	freeblks2;
+	xfs_extlen_t		freeblks2;
 #endif
-	xfs_agblock_t	num_extents;
-	struct agi_stat	agi_stat = {0,};
+	xfs_agblock_t		num_extents;
 
 	if (verbose)
 		do_log(_("        - agno = %d\n"), agno);
@@ -2533,8 +2731,8 @@ inject_lost_blocks(
 		if (error)
 			goto out_cancel;
 
-		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
-					    XFS_AG_RESV_NONE);
+		error = -libxfs_free_extent(tp, *fsb, 1,
+				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
 		if (error)
 			goto out_cancel;
 


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-03-04  3:29 [PATCH v3 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
@ 2020-03-04  3:29 ` Darrick J. Wong
  0 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2020-03-04  3:29 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <darrick.wong@oracle.com>

Create some new support structures and functions to assist phase5 in
using the btree bulk loader to reconstruct metadata btrees.  This is the
first step in removing the open-coded rebuilding code.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |  231 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 210 insertions(+), 21 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index 3cc3f238..d5de2392 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -18,6 +18,7 @@
 #include "progress.h"
 #include "slab.h"
 #include "rmap.h"
+#include "bload.h"
 
 /*
  * we maintain the current slice (path from root to leaf)
@@ -65,6 +66,14 @@ typedef struct bt_status  {
 	uint64_t		owner;		/* owner */
 } bt_status_t;
 
+struct bt_rebuild {
+	struct xrep_newbt	newbt;
+	struct xfs_btree_bload	bload;
+	union {
+		struct xfs_slab_cursor	*slab_cursor;
+	};
+};
+
 /*
  * extra metadata for the agi
  */
@@ -306,6 +315,157 @@ _("error - not enough free space in filesystem\n"));
 #endif
 }
 
+/*
+ * Estimate proper slack values for a btree that's being reloaded.
+ *
+ * Under most circumstances, we'll take whatever default loading value the
+ * btree bulk loading code calculates for us.  However, there are some
+ * exceptions to this rule:
+ *
+ * (1) If someone turned one of the debug knobs.
+ * (2) The AG has less than ~9% space free.
+ *
+ * Note that we actually use 3/32 for the comparison to avoid division.
+ */
+static void
+estimate_ag_bload_slack(
+	struct repair_ctx	*sc,
+	struct xfs_btree_bload	*bload,
+	unsigned int		free)
+{
+	/*
+	 * The global values are set to -1 (i.e. take the bload defaults)
+	 * unless someone has set them otherwise, so we just pull the values
+	 * here.
+	 */
+	bload->leaf_slack = bload_leaf_slack;
+	bload->node_slack = bload_node_slack;
+
+	/* No further changes if there's more than 3/32ths space left. */
+	if (free >= ((sc->mp->m_sb.sb_agblocks * 3) >> 5))
+		return;
+
+	/* We're low on space; load the btrees as tightly as possible. */
+	if (bload->leaf_slack < 0)
+		bload->leaf_slack = 0;
+	if (bload->node_slack < 0)
+		bload->node_slack = 0;
+}
+
+/* Initialize a btree rebuild context. */
+static void
+init_rebuild(
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_agblock_t			free_space,
+	struct bt_rebuild		*btr)
+{
+	memset(btr, 0, sizeof(struct bt_rebuild));
+
+	xrep_newbt_init_bare(&btr->newbt, sc);
+	btr->newbt.oinfo = *oinfo; /* struct copy */
+	estimate_ag_bload_slack(sc, &btr->bload, free_space);
+}
+
+/* Reserve blocks for the new btree. */
+static void
+setup_rebuild(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr,
+	uint32_t		nr_blocks)
+{
+	struct extent_tree_node	*ext_ptr;
+	struct extent_tree_node	*bno_ext_ptr;
+	uint32_t		blocks_allocated = 0;
+	int			error;
+
+	/*
+	 * grab the smallest extent and use it up, then get the
+	 * next smallest.  This mimics the init_*_cursor code.
+	 */
+	ext_ptr =  findfirst_bcnt_extent(agno);
+
+	/*
+	 * set up the free block array
+	 */
+	while (blocks_allocated < nr_blocks)  {
+		uint64_t	len;
+		xfs_agblock_t	new_start;
+		xfs_extlen_t	new_len;
+
+		if (!ext_ptr)
+			do_error(
+_("error - not enough free space in filesystem\n"));
+
+		/* Use up the extent we've got. */
+		len = min(ext_ptr->ex_blockcount,
+				btr->bload.nr_blocks - blocks_allocated);
+		error = xrep_newbt_add_reservation(&btr->newbt,
+				XFS_AGB_TO_FSB(mp, agno,
+					       ext_ptr->ex_startblock),
+				len, NULL);
+		if (error)
+			do_error(_("could not set up btree reservation: %s\n"),
+				strerror(-error));
+		blocks_allocated += len;
+
+		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
+				btr->newbt.oinfo.oi_owner);
+		if (error)
+			do_error(_("could not set up btree rmaps: %s\n"),
+				strerror(-error));
+
+		/* Figure out if we're putting anything back. */
+		new_start = ext_ptr->ex_startblock + len;
+		new_len = ext_ptr->ex_blockcount - len;
+
+		/* Delete the used-up extent from both extent trees. */
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "releasing extent: %u [%u %u]\n",
+			agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+#endif
+		bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
+		ASSERT(bno_ext_ptr != NULL);
+		get_bno_extent(agno, bno_ext_ptr);
+		release_extent_tree_node(bno_ext_ptr);
+
+		ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
+				ext_ptr->ex_blockcount);
+		ASSERT(ext_ptr != NULL);
+		release_extent_tree_node(ext_ptr);
+
+		/*
+		 * If we only used part of this last extent, then we need only
+		 * to reinsert the extent in the extent trees and we're done.
+		 */
+		if (new_len > 0) {
+			add_bno_extent(agno, new_start, new_len);
+			add_bcnt_extent(agno, new_start, new_len);
+			break;
+		}
+
+		/* Otherwise, find the next biggest extent. */
+		ext_ptr = findfirst_bcnt_extent(agno);
+	}
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "blocks_allocated = %d\n",
+		blocks_allocated);
+#endif
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+static int
+rebuild_alloc_block(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	void			*priv)
+{
+	struct bt_rebuild	*btr = priv;
+
+	return xrep_newbt_alloc_block(cur, &btr->newbt, ptr);
+}
+
 static void
 write_cursor(bt_status_t *curs)
 {
@@ -336,6 +496,34 @@ finish_cursor(bt_status_t *curs)
 	free(curs->btree_blocks);
 }
 
+static void
+finish_rebuild(
+	struct xfs_mount	*mp,
+	struct bt_rebuild	*btr)
+{
+	struct xrep_newbt_resv	*resv, *n;
+
+	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
+		xfs_agnumber_t	agno;
+		xfs_agblock_t	bno;
+		xfs_extlen_t	len;
+
+		if (resv->used >= resv->len)
+			continue;
+
+		/* XXX: Shouldn't this go on the AGFL? */
+		/* Put back everything we didn't use. */
+		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
+		agno = XFS_FSB_TO_AGNO(mp, resv->fsbno + resv->used);
+		len = resv->len - resv->used;
+
+		add_bno_extent(agno, bno, len);
+		add_bcnt_extent(agno, bno, len);
+	}
+
+	xrep_newbt_destroy(&btr->newbt, 0);
+}
+
 /*
  * We need to leave some free records in the tree for the corner case of
  * setting up the AGFL. This may require allocation of blocks, and as
@@ -2287,28 +2475,29 @@ keep_fsinos(xfs_mount_t *mp)
 
 static void
 phase5_func(
-	xfs_mount_t	*mp,
-	xfs_agnumber_t	agno,
-	struct xfs_slab	*lost_fsb)
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct xfs_slab		*lost_fsb)
 {
-	uint64_t	num_inos;
-	uint64_t	num_free_inos;
-	uint64_t	finobt_num_inos;
-	uint64_t	finobt_num_free_inos;
-	bt_status_t	bno_btree_curs;
-	bt_status_t	bcnt_btree_curs;
-	bt_status_t	ino_btree_curs;
-	bt_status_t	fino_btree_curs;
-	bt_status_t	rmap_btree_curs;
-	bt_status_t	refcnt_btree_curs;
-	int		extra_blocks = 0;
-	uint		num_freeblocks;
-	xfs_extlen_t	freeblks1;
+	struct repair_ctx	sc = { .mp = mp, };
+	struct agi_stat		agi_stat = {0,};
+	uint64_t		num_inos;
+	uint64_t		num_free_inos;
+	uint64_t		finobt_num_inos;
+	uint64_t		finobt_num_free_inos;
+	bt_status_t		bno_btree_curs;
+	bt_status_t		bcnt_btree_curs;
+	bt_status_t		ino_btree_curs;
+	bt_status_t		fino_btree_curs;
+	bt_status_t		rmap_btree_curs;
+	bt_status_t		refcnt_btree_curs;
+	int			extra_blocks = 0;
+	uint			num_freeblocks;
+	xfs_extlen_t		freeblks1;
 #ifdef DEBUG
-	xfs_extlen_t	freeblks2;
+	xfs_extlen_t		freeblks2;
 #endif
-	xfs_agblock_t	num_extents;
-	struct agi_stat	agi_stat = {0,};
+	xfs_agblock_t		num_extents;
 
 	if (verbose)
 		do_log(_("        - agno = %d\n"), agno);
@@ -2530,8 +2719,8 @@ inject_lost_blocks(
 		if (error)
 			goto out_cancel;
 
-		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
-					    XFS_AG_RESV_NONE);
+		error = -libxfs_free_extent(tp, *fsb, 1,
+				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
 		if (error)
 			goto out_cancel;
 


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2019-10-29 23:45 [PATCH RFC 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
@ 2019-10-29 23:45 ` Darrick J. Wong
  0 siblings, 0 replies; 20+ messages in thread
From: Darrick J. Wong @ 2019-10-29 23:45 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Create some new support structures and functions to assist phase5 in
using the btree bulk loader to reconstruct metadata btrees.  This is the
first step in removing the open-coded rebuilding code.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |  231 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 210 insertions(+), 21 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index 4108e22b..ec236d4c 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -18,6 +18,7 @@
 #include "progress.h"
 #include "slab.h"
 #include "rmap.h"
+#include "bload.h"
 
 /*
  * we maintain the current slice (path from root to leaf)
@@ -65,6 +66,14 @@ typedef struct bt_status  {
 	uint64_t		owner;		/* owner */
 } bt_status_t;
 
+struct bt_rebuild {
+	struct xrep_newbt	newbt;
+	struct xfs_btree_bload	bload;
+	union {
+		struct xfs_slab_cursor	*slab_cursor;
+	};
+};
+
 /*
  * extra metadata for the agi
  */
@@ -306,6 +315,157 @@ _("error - not enough free space in filesystem\n"));
 #endif
 }
 
+/*
+ * Estimate proper slack values for a btree that's being reloaded.
+ *
+ * Under most circumstances, we'll take whatever default loading value the
+ * btree bulk loading code calculates for us.  However, there are some
+ * exceptions to this rule:
+ *
+ * (1) If someone turned one of the debug knobs.
+ * (2) The AG has less than ~9% space free.
+ *
+ * Note that we actually use 3/32 for the comparison to avoid division.
+ */
+static void
+estimate_ag_bload_slack(
+	struct repair_ctx	*sc,
+	struct xfs_btree_bload	*bload,
+	unsigned int		free)
+{
+	/*
+	 * The global values are set to -1 (i.e. take the bload defaults)
+	 * unless someone has set them otherwise, so we just pull the values
+	 * here.
+	 */
+	bload->leaf_slack = bload_leaf_slack;
+	bload->node_slack = bload_node_slack;
+
+	/* No further changes if there's more than 3/32ths space left. */
+	if (free >= ((sc->mp->m_sb.sb_agblocks * 3) >> 5))
+		return;
+
+	/* We're low on space; load the btrees as tightly as possible. */
+	if (bload->leaf_slack < 0)
+		bload->leaf_slack = 0;
+	if (bload->node_slack < 0)
+		bload->node_slack = 0;
+}
+
+/* Initialize a btree rebuild context. */
+static void
+init_rebuild(
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_agblock_t			free_space,
+	struct bt_rebuild		*btr)
+{
+	memset(btr, 0, sizeof(struct bt_rebuild));
+
+	xrep_newbt_init_bare(&btr->newbt, sc);
+	btr->newbt.oinfo = *oinfo; /* struct copy */
+	estimate_ag_bload_slack(sc, &btr->bload, free_space);
+}
+
+/* Reserve blocks for the new btree. */
+static void
+setup_rebuild(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr,
+	uint32_t		nr_blocks)
+{
+	struct extent_tree_node	*ext_ptr;
+	struct extent_tree_node	*bno_ext_ptr;
+	uint32_t		blocks_allocated = 0;
+	int			error;
+
+	/*
+	 * grab the smallest extent and use it up, then get the
+	 * next smallest.  This mimics the init_*_cursor code.
+	 */
+	ext_ptr =  findfirst_bcnt_extent(agno);
+
+	/*
+	 * set up the free block array
+	 */
+	while (blocks_allocated < nr_blocks)  {
+		uint64_t	len;
+		xfs_agblock_t	new_start;
+		xfs_extlen_t	new_len;
+
+		if (!ext_ptr)
+			do_error(
+_("error - not enough free space in filesystem\n"));
+
+		/* Use up the extent we've got. */
+		len = min(ext_ptr->ex_blockcount,
+				btr->bload.nr_blocks - blocks_allocated);
+		error = xrep_newbt_add_reservation(&btr->newbt,
+				XFS_AGB_TO_FSB(mp, agno,
+					       ext_ptr->ex_startblock),
+				len, NULL);
+		if (error)
+			do_error(_("could not set up btree reservation: %s\n"),
+				strerror(-error));
+		blocks_allocated += len;
+
+		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
+				btr->newbt.oinfo.oi_owner);
+		if (error)
+			do_error(_("could not set up btree rmaps: %s\n"),
+				strerror(-error));
+
+		/* Figure out if we're putting anything back. */
+		new_start = ext_ptr->ex_startblock + len;
+		new_len = ext_ptr->ex_blockcount - len;
+
+		/* Delete the used-up extent from both extent trees. */
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "releasing extent: %u [%u %u]\n",
+			agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+#endif
+		bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
+		ASSERT(bno_ext_ptr != NULL);
+		get_bno_extent(agno, bno_ext_ptr);
+		release_extent_tree_node(bno_ext_ptr);
+
+		ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
+				ext_ptr->ex_blockcount);
+		ASSERT(ext_ptr != NULL);
+		release_extent_tree_node(ext_ptr);
+
+		/*
+		 * If we only used part of this last extent, then we need only
+		 * to reinsert the extent in the extent trees and we're done.
+		 */
+		if (new_len > 0) {
+			add_bno_extent(agno, new_start, new_len);
+			add_bcnt_extent(agno, new_start, new_len);
+			break;
+		}
+
+		/* Otherwise, find the next biggest extent. */
+		ext_ptr = findfirst_bcnt_extent(agno);
+	}
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "blocks_allocated = %d\n",
+		blocks_allocated);
+#endif
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+static int
+rebuild_alloc_block(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	void			*priv)
+{
+	struct bt_rebuild	*btr = priv;
+
+	return xrep_newbt_alloc_block(cur, &btr->newbt, ptr);
+}
+
 static void
 write_cursor(bt_status_t *curs)
 {
@@ -334,6 +494,34 @@ finish_cursor(bt_status_t *curs)
 	free(curs->btree_blocks);
 }
 
+static void
+finish_rebuild(
+	struct xfs_mount	*mp,
+	struct bt_rebuild	*btr)
+{
+	struct xrep_newbt_resv	*resv, *n;
+
+	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
+		xfs_agnumber_t	agno;
+		xfs_agblock_t	bno;
+		xfs_extlen_t	len;
+
+		if (resv->used >= resv->len)
+			continue;
+
+		/* XXX: Shouldn't this go on the AGFL? */
+		/* Put back everything we didn't use. */
+		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
+		agno = XFS_FSB_TO_AGNO(mp, resv->fsbno + resv->used);
+		len = resv->len - resv->used;
+
+		add_bno_extent(agno, bno, len);
+		add_bcnt_extent(agno, bno, len);
+	}
+
+	xrep_newbt_destroy(&btr->newbt, 0);
+}
+
 /*
  * We need to leave some free records in the tree for the corner case of
  * setting up the AGFL. This may require allocation of blocks, and as
@@ -2211,28 +2399,29 @@ keep_fsinos(xfs_mount_t *mp)
 
 static void
 phase5_func(
-	xfs_mount_t	*mp,
-	xfs_agnumber_t	agno,
-	struct xfs_slab	*lost_fsb)
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct xfs_slab		*lost_fsb)
 {
-	uint64_t	num_inos;
-	uint64_t	num_free_inos;
-	uint64_t	finobt_num_inos;
-	uint64_t	finobt_num_free_inos;
-	bt_status_t	bno_btree_curs;
-	bt_status_t	bcnt_btree_curs;
-	bt_status_t	ino_btree_curs;
-	bt_status_t	fino_btree_curs;
-	bt_status_t	rmap_btree_curs;
-	bt_status_t	refcnt_btree_curs;
-	int		extra_blocks = 0;
-	uint		num_freeblocks;
-	xfs_extlen_t	freeblks1;
+	struct repair_ctx	sc = { .mp = mp, };
+	struct agi_stat		agi_stat = {0,};
+	uint64_t		num_inos;
+	uint64_t		num_free_inos;
+	uint64_t		finobt_num_inos;
+	uint64_t		finobt_num_free_inos;
+	bt_status_t		bno_btree_curs;
+	bt_status_t		bcnt_btree_curs;
+	bt_status_t		ino_btree_curs;
+	bt_status_t		fino_btree_curs;
+	bt_status_t		rmap_btree_curs;
+	bt_status_t		refcnt_btree_curs;
+	int			extra_blocks = 0;
+	uint			num_freeblocks;
+	xfs_extlen_t		freeblks1;
 #ifdef DEBUG
-	xfs_extlen_t	freeblks2;
+	xfs_extlen_t		freeblks2;
 #endif
-	xfs_agblock_t	num_extents;
-	struct agi_stat	agi_stat = {0,};
+	xfs_agblock_t		num_extents;
 
 	if (verbose)
 		do_log(_("        - agno = %d\n"), agno);
@@ -2454,8 +2643,8 @@ inject_lost_blocks(
 		if (error)
 			goto out_cancel;
 
-		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
-					    XFS_AG_RESV_NONE);
+		error = -libxfs_free_extent(tp, *fsb, 1,
+				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
 		if (error)
 			goto out_cancel;
 


^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2020-05-29 21:08 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
2020-01-01  1:21 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
2020-01-01  1:21 ` [PATCH 2/9] xfs_repair: unindent phase 5 function Darrick J. Wong
2020-01-01  1:21 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
2020-01-01  1:21 ` [PATCH 4/9] xfs_repair: rebuild free space btrees with bulk loader Darrick J. Wong
2020-01-01  1:21 ` [PATCH 5/9] xfs_repair: rebuild inode " Darrick J. Wong
2020-01-01  1:22 ` [PATCH 6/9] xfs_repair: rebuild reverse mapping " Darrick J. Wong
2020-01-01  1:22 ` [PATCH 7/9] xfs_repair: rebuild refcount " Darrick J. Wong
2020-01-01  1:22 ` [PATCH 8/9] xfs_repair: remove old btree rebuild support code Darrick J. Wong
2020-01-01  1:22 ` [PATCH 9/9] xfs_repair: track blocks lost during btree construction via extents Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2020-05-20  1:50 [PATCH v5 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
2020-05-20  1:51 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
2020-05-27 12:18   ` Brian Foster
2020-05-27 22:07     ` Darrick J. Wong
2020-05-28 15:09       ` Brian Foster
2020-05-29 21:08         ` Darrick J. Wong
2020-05-09 16:31 [PATCH v4 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
2020-05-09 16:31 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
2020-05-14 15:11   ` Brian Foster
2020-05-14 19:47     ` Darrick J. Wong
2020-03-04  3:29 [PATCH v3 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
2020-03-04  3:29 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
2019-10-29 23:45 [PATCH RFC 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
2019-10-29 23:45 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).