linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/9] xfs_repair: use btree bulk loading
@ 2020-01-01  1:21 Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
                   ` (8 more replies)
  0 siblings, 9 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

Hi all,

Refactor xfs_repair to use the btree bulk loading code instead of the
open-coded versions in phase5.c.  This isn't a full submission; it's
merely a demonstration of how the kernel patches can be used in
userspace.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=repair-bulk-load

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
@ 2020-01-01  1:21 ` Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 2/9] xfs_repair: unindent phase 5 function Darrick J. Wong
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Port the new btree staging context and related block reservation helper
code from the kernel to repair.  We'll use this in subsequent patches to
implement btree bulk loading.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    2 
 repair/Makefile          |    4 -
 repair/bload.c           |  276 ++++++++++++++++++++++++++++++++++++++++++++++
 repair/bload.h           |   79 +++++++++++++
 repair/xfs_repair.c      |   17 +++
 5 files changed, 376 insertions(+), 2 deletions(-)
 create mode 100644 repair/bload.c
 create mode 100644 repair/bload.h


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 355f99a2..6bab5a70 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -50,6 +50,8 @@
 #define xfs_attr_remove			libxfs_attr_remove
 #define xfs_attr_leaf_newentsize	libxfs_attr_leaf_newentsize
 
+#define xfs_alloc_vextent		libxfs_alloc_vextent
+#define __xfs_bmap_add_free		__libxfs_bmap_add_free
 #define xfs_agfl_walk			libxfs_agfl_walk
 #define xfs_alloc_fix_freelist		libxfs_alloc_fix_freelist
 #define xfs_alloc_min_freelist		libxfs_alloc_min_freelist
diff --git a/repair/Makefile b/repair/Makefile
index 0964499a..8cc1ee68 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -9,11 +9,11 @@ LSRCFILES = README
 
 LTCOMMAND = xfs_repair
 
-HFILES = agheader.h attr_repair.h avl.h bmap.h btree.h \
+HFILES = agheader.h attr_repair.h avl.h bload.h bmap.h btree.h \
 	da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
 	rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
 
-CFILES = agheader.c attr_repair.c avl.c bmap.c btree.c \
+CFILES = agheader.c attr_repair.c avl.c bload.c bmap.c btree.c \
 	da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
 	incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
 	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
diff --git a/repair/bload.c b/repair/bload.c
new file mode 100644
index 00000000..896e2ae6
--- /dev/null
+++ b/repair/bload.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <libxfs.h>
+#include "bload.h"
+
+#define trace_xrep_newbt_alloc_block(...)	((void) 0)
+#define trace_xrep_newbt_reserve_space(...)	((void) 0)
+#define trace_xrep_newbt_unreserve_space(...)	((void) 0)
+#define trace_xrep_newbt_alloc_block(...)	((void) 0)
+
+int bload_leaf_slack = -1;
+int bload_node_slack = -1;
+
+/* Ported routines from fs/xfs/scrub/repair.c */
+
+/*
+ * Roll a transaction, keeping the AG headers locked and reinitializing
+ * the btree cursors.
+ */
+int
+xrep_roll_ag_trans(
+	struct repair_ctx	*sc)
+{
+	int			error;
+
+	/* Keep the AG header buffers locked so we can keep going. */
+	if (sc->agi_bp)
+		libxfs_trans_bhold(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bhold(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
+
+	/*
+	 * Roll the transaction.  We still own the buffer and the buffer lock
+	 * regardless of whether or not the roll succeeds.  If the roll fails,
+	 * the buffers will be released during teardown on our way out of the
+	 * kernel.  If it succeeds, we join them to the new transaction and
+	 * move on.
+	 */
+	error = -libxfs_trans_roll(&sc->tp);
+	if (error)
+		return error;
+
+	/* Join AG headers to the new transaction. */
+	if (sc->agi_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
+
+	return 0;
+}
+
+/* Initialize accounting resources for staging a new AG btree. */
+void
+xrep_newbt_init_ag(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_fsblock_t			alloc_hint,
+	enum xfs_ag_resv_type		resv)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = alloc_hint;
+	xnr->resv = resv;
+	INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/* Initialize accounting resources for staging a new inode fork btree. */
+void
+xrep_newbt_init_inode(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	int				whichfork,
+	const struct xfs_owner_info	*oinfo)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino);
+	xnr->resv = XFS_AG_RESV_NONE;
+	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
+	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
+	INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/*
+ * Initialize accounting resources for staging a new btree.  Callers are
+ * expected to add their own reservations (and clean them up) manually.
+ */
+void
+xrep_newbt_init_bare(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc)
+{
+	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
+			XFS_AG_RESV_NONE);
+}
+
+/* Add a space reservation manually. */
+int
+xrep_newbt_add_reservation(
+	struct xrep_newbt		*xnr,
+	xfs_fsblock_t			fsbno,
+	xfs_extlen_t			len,
+	void				*priv)
+{
+	struct xrep_newbt_resv	*resv;
+
+	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
+	if (!resv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&resv->list);
+	resv->fsbno = fsbno;
+	resv->len = len;
+	resv->used = 0;
+	resv->priv = priv;
+	list_add_tail(&resv->list, &xnr->reservations);
+	return 0;
+}
+
+/* Reserve disk space for our new btree. */
+int
+xrep_newbt_reserve_space(
+	struct xrep_newbt	*xnr,
+	uint64_t		nr_blocks)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	xfs_alloctype_t		type;
+	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
+	int			error = 0;
+
+	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
+
+	while (nr_blocks > 0 && !error) {
+		struct xfs_alloc_arg	args = {
+			.tp		= sc->tp,
+			.mp		= sc->mp,
+			.type		= type,
+			.fsbno		= alloc_hint,
+			.oinfo		= xnr->oinfo,
+			.minlen		= 1,
+			.maxlen		= nr_blocks,
+			.prod		= nr_blocks,
+			.resv		= xnr->resv,
+		};
+
+		error = -libxfs_alloc_vextent(&args);
+		if (error)
+			return error;
+		if (args.fsbno == NULLFSBLOCK)
+			return -ENOSPC;
+
+		trace_xrep_newbt_reserve_space(sc->mp,
+				XFS_FSB_TO_AGNO(sc->mp, args.fsbno),
+				XFS_FSB_TO_AGBNO(sc->mp, args.fsbno),
+				args.len, xnr->oinfo.oi_owner);
+
+		/* We don't have real EFIs here so skip that. */
+
+		error = xrep_newbt_add_reservation(xnr, args.fsbno, args.len,
+				NULL);
+		if (error)
+			break;
+
+		nr_blocks -= args.len;
+		alloc_hint = args.fsbno + args.len - 1;
+
+		if (sc->ip)
+			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
+		else
+			error = xrep_roll_ag_trans(sc);
+	}
+
+	return error;
+}
+
+/* Free all the accounting infor and disk space we reserved for a new btree. */
+void
+xrep_newbt_destroy(
+	struct xrep_newbt	*xnr,
+	int			error)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	struct xrep_newbt_resv	*resv, *n;
+
+	if (error)
+		goto junkit;
+
+	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+		/* We don't have EFIs here so skip the EFD. */
+
+		/* Free every block we didn't use. */
+		resv->fsbno += resv->used;
+		resv->len -= resv->used;
+		resv->used = 0;
+
+		if (resv->len > 0) {
+			trace_xrep_newbt_unreserve_space(sc->mp,
+					XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
+					XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
+					resv->len, xnr->oinfo.oi_owner);
+
+			__libxfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
+					&xnr->oinfo, true);
+		}
+
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+junkit:
+	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+	if (sc->ip) {
+		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
+		xnr->ifake.if_fork = NULL;
+	}
+}
+
+/* Feed one of the reserved btree blocks to the bulk loader. */
+int
+xrep_newbt_alloc_block(
+	struct xfs_btree_cur	*cur,
+	struct xrep_newbt	*xnr,
+	union xfs_btree_ptr	*ptr)
+{
+	struct xrep_newbt_resv	*resv;
+	xfs_fsblock_t		fsb;
+
+	/*
+	 * If last_resv doesn't have a block for us, move forward until we find
+	 * one that does (or run out of reservations).
+	 */
+	if (xnr->last_resv == NULL) {
+		list_for_each_entry(resv, &xnr->reservations, list) {
+			if (resv->used < resv->len) {
+				xnr->last_resv = resv;
+				break;
+			}
+		}
+		if (xnr->last_resv == NULL)
+			return -ENOSPC;
+	} else if (xnr->last_resv->used == xnr->last_resv->len) {
+		if (xnr->last_resv->list.next == &xnr->reservations)
+			return -ENOSPC;
+		xnr->last_resv = list_entry(xnr->last_resv->list.next,
+				struct xrep_newbt_resv, list);
+	}
+
+	/* Nab the block. */
+	fsb = xnr->last_resv->fsbno + xnr->last_resv->used;
+	xnr->last_resv->used++;
+
+	trace_xrep_newbt_alloc_block(cur->bc_mp,
+			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
+			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
+			xnr->oinfo.oi_owner);
+
+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+		ptr->l = cpu_to_be64(fsb);
+	else
+		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
+	return 0;
+}
diff --git a/repair/bload.h b/repair/bload.h
new file mode 100644
index 00000000..8f890157
--- /dev/null
+++ b/repair/bload.h
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_REPAIR_BLOAD_H__
+#define __XFS_REPAIR_BLOAD_H__
+
+extern int bload_leaf_slack;
+extern int bload_node_slack;
+
+struct repair_ctx {
+	struct xfs_mount	*mp;
+	struct xfs_inode	*ip;
+	struct xfs_trans	*tp;
+
+	struct xfs_buf		*agi_bp;
+	struct xfs_buf		*agf_bp;
+	struct xfs_buf		*agfl_bp;
+};
+
+struct xrep_newbt_resv {
+	/* Link to list of extents that we've reserved. */
+	struct list_head	list;
+
+	void			*priv;
+
+	/* FSB of the block we reserved. */
+	xfs_fsblock_t		fsbno;
+
+	/* Length of the reservation. */
+	xfs_extlen_t		len;
+
+	/* How much of this reservation we've used. */
+	xfs_extlen_t		used;
+};
+
+struct xrep_newbt {
+	struct repair_ctx	*sc;
+
+	/* List of extents that we've reserved. */
+	struct list_head	reservations;
+
+	/* Fake root for new btree. */
+	union {
+		struct xbtree_afakeroot	afake;
+		struct xbtree_ifakeroot	ifake;
+	};
+
+	/* rmap owner of these blocks */
+	struct xfs_owner_info	oinfo;
+
+	/* The last reservation we allocated from. */
+	struct xrep_newbt_resv	*last_resv;
+
+	/* Allocation hint */
+	xfs_fsblock_t		alloc_hint;
+
+	/* per-ag reservation type */
+	enum xfs_ag_resv_type	resv;
+};
+
+#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
+	list_for_each_entry_safe((resv), (n), &(xnr)->reservations, list)
+
+void xrep_newbt_init_bare(struct xrep_newbt *xba, struct repair_ctx *sc);
+void xrep_newbt_init_ag(struct xrep_newbt *xba, struct repair_ctx *sc,
+		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
+		enum xfs_ag_resv_type resv);
+void xrep_newbt_init_inode(struct xrep_newbt *xba, struct repair_ctx *sc,
+		int whichfork, const struct xfs_owner_info *oinfo);
+int xrep_newbt_add_reservation(struct xrep_newbt *xba, xfs_fsblock_t fsbno,
+		xfs_extlen_t len, void *priv);
+int xrep_newbt_reserve_space(struct xrep_newbt *xba, uint64_t nr_blocks);
+void xrep_newbt_destroy(struct xrep_newbt *xba, int error);
+int xrep_newbt_alloc_block(struct xfs_btree_cur *cur, struct xrep_newbt *xba,
+		union xfs_btree_ptr *ptr);
+
+#endif /* __XFS_REPAIR_BLOAD_H__ */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 66e2c335..ecfb1ff1 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -24,6 +24,7 @@
 #include "rmap.h"
 #include "libfrog/fsgeom.h"
 #include "libfrog/platform.h"
+#include "bload.h"
 
 /*
  * option tables for getsubopt calls
@@ -39,6 +40,8 @@ enum o_opt_nums {
 	AG_STRIDE,
 	FORCE_GEO,
 	PHASE2_THREADS,
+	BLOAD_LEAF_SLACK,
+	BLOAD_NODE_SLACK,
 	O_MAX_OPTS,
 };
 
@@ -49,6 +52,8 @@ static char *o_opts[] = {
 	[AG_STRIDE]		= "ag_stride",
 	[FORCE_GEO]		= "force_geometry",
 	[PHASE2_THREADS]	= "phase2_threads",
+	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
+	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
 	[O_MAX_OPTS]		= NULL,
 };
 
@@ -260,6 +265,18 @@ process_args(int argc, char **argv)
 		_("-o phase2_threads requires a parameter\n"));
 					phase2_threads = (int)strtol(val, NULL, 0);
 					break;
+				case BLOAD_LEAF_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_leaf_slack requires a parameter\n"));
+					bload_leaf_slack = (int)strtol(val, NULL, 0);
+					break;
+				case BLOAD_NODE_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_node_slack requires a parameter\n"));
+					bload_node_slack = (int)strtol(val, NULL, 0);
+					break;
 				default:
 					unknown('o', val);
 					break;


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/9] xfs_repair: unindent phase 5 function
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
@ 2020-01-01  1:21 ` Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Remove the unnecessary indent in phase5_func.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |  309 +++++++++++++++++++++++++++----------------------------
 1 file changed, 154 insertions(+), 155 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index 7f7d3d18..4108e22b 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -2237,201 +2237,200 @@ phase5_func(
 	if (verbose)
 		do_log(_("        - agno = %d\n"), agno);
 
-	{
-		/*
-		 * build up incore bno and bcnt extent btrees
-		 */
-		num_extents = mk_incore_fstree(mp, agno);
+	/*
+	 * build up incore bno and bcnt extent btrees
+	 */
+	num_extents = mk_incore_fstree(mp, agno);
 
 #ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "# of bno extents is %d\n",
-				count_bno_extents(agno));
+	fprintf(stderr, "# of bno extents is %d\n",
+			count_bno_extents(agno));
 #endif
 
-		if (num_extents == 0)  {
-			/*
-			 * XXX - what we probably should do here is pick an
-			 * inode for a regular file in the allocation group
-			 * that has space allocated and shoot it by traversing
-			 * the bmap list and putting all its extents on the
-			 * incore freespace trees, clearing the inode,
-			 * and clearing the in-use bit in the incore inode
-			 * tree.  Then try mk_incore_fstree() again.
-			 */
-			do_error(_("unable to rebuild AG %u.  "
-				  "Not enough free space in on-disk AG.\n"),
-				agno);
-		}
-
+	if (num_extents == 0)  {
 		/*
-		 * ok, now set up the btree cursors for the
-		 * on-disk btrees (includs pre-allocating all
-		 * required blocks for the trees themselves)
+		 * XXX - what we probably should do here is pick an
+		 * inode for a regular file in the allocation group
+		 * that has space allocated and shoot it by traversing
+		 * the bmap list and putting all its extents on the
+		 * incore freespace trees, clearing the inode,
+		 * and clearing the in-use bit in the incore inode
+		 * tree.  Then try mk_incore_fstree() again.
 		 */
-		init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos,
-				&num_free_inos, 0);
+		do_error(_("unable to rebuild AG %u.  "
+			  "Not enough free space in on-disk AG.\n"),
+			agno);
+	}
 
-		if (xfs_sb_version_hasfinobt(&mp->m_sb))
-			init_ino_cursor(mp, agno, &fino_btree_curs,
-					&finobt_num_inos, &finobt_num_free_inos,
-					1);
+	/*
+	 * ok, now set up the btree cursors for the
+	 * on-disk btrees (includs pre-allocating all
+	 * required blocks for the trees themselves)
+	 */
+	init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos,
+			&num_free_inos, 0);
 
-		sb_icount_ag[agno] += num_inos;
-		sb_ifree_ag[agno] += num_free_inos;
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		init_ino_cursor(mp, agno, &fino_btree_curs,
+				&finobt_num_inos, &finobt_num_free_inos,
+				1);
 
-		/*
-		 * Set up the btree cursors for the on-disk rmap btrees,
-		 * which includes pre-allocating all required blocks.
-		 */
-		init_rmapbt_cursor(mp, agno, &rmap_btree_curs);
+	sb_icount_ag[agno] += num_inos;
+	sb_ifree_ag[agno] += num_free_inos;
 
-		/*
-		 * Set up the btree cursors for the on-disk refcount btrees,
-		 * which includes pre-allocating all required blocks.
-		 */
-		init_refc_cursor(mp, agno, &refcnt_btree_curs);
+	/*
+	 * Set up the btree cursors for the on-disk rmap btrees,
+	 * which includes pre-allocating all required blocks.
+	 */
+	init_rmapbt_cursor(mp, agno, &rmap_btree_curs);
 
-		num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
+	/*
+	 * Set up the btree cursors for the on-disk refcount btrees,
+	 * which includes pre-allocating all required blocks.
+	 */
+	init_refc_cursor(mp, agno, &refcnt_btree_curs);
+
+	num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
+	/*
+	 * lose two blocks per AG -- the space tree roots
+	 * are counted as allocated since the space trees
+	 * always have roots
+	 */
+	sb_fdblocks_ag[agno] += num_freeblocks - 2;
+
+	if (num_extents == 0)  {
 		/*
-		 * lose two blocks per AG -- the space tree roots
-		 * are counted as allocated since the space trees
-		 * always have roots
+		 * XXX - what we probably should do here is pick an
+		 * inode for a regular file in the allocation group
+		 * that has space allocated and shoot it by traversing
+		 * the bmap list and putting all its extents on the
+		 * incore freespace trees, clearing the inode,
+		 * and clearing the in-use bit in the incore inode
+		 * tree.  Then try mk_incore_fstree() again.
 		 */
-		sb_fdblocks_ag[agno] += num_freeblocks - 2;
-
-		if (num_extents == 0)  {
-			/*
-			 * XXX - what we probably should do here is pick an
-			 * inode for a regular file in the allocation group
-			 * that has space allocated and shoot it by traversing
-			 * the bmap list and putting all its extents on the
-			 * incore freespace trees, clearing the inode,
-			 * and clearing the in-use bit in the incore inode
-			 * tree.  Then try mk_incore_fstree() again.
-			 */
-			do_error(
-			_("unable to rebuild AG %u.  No free space.\n"), agno);
-		}
+		do_error(
+		_("unable to rebuild AG %u.  No free space.\n"), agno);
+	}
 
 #ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "# of bno extents is %d\n", num_extents);
+	fprintf(stderr, "# of bno extents is %d\n", num_extents);
 #endif
 
-		/*
-		 * track blocks that we might really lose
-		 */
-		extra_blocks = calculate_freespace_cursor(mp, agno,
-					&num_extents, &bno_btree_curs);
+	/*
+	 * track blocks that we might really lose
+	 */
+	extra_blocks = calculate_freespace_cursor(mp, agno,
+				&num_extents, &bno_btree_curs);
 
-		/*
-		 * freespace btrees live in the "free space" but
-		 * the filesystem treats AGFL blocks as allocated
-		 * since they aren't described by the freespace trees
-		 */
+	/*
+	 * freespace btrees live in the "free space" but
+	 * the filesystem treats AGFL blocks as allocated
+	 * since they aren't described by the freespace trees
+	 */
 
-		/*
-		 * see if we can fit all the extra blocks into the AGFL
-		 */
-		extra_blocks = (extra_blocks - libxfs_agfl_size(mp) > 0)
-				? extra_blocks - libxfs_agfl_size(mp)
-				: 0;
+	/*
+	 * see if we can fit all the extra blocks into the AGFL
+	 */
+	extra_blocks = (extra_blocks - libxfs_agfl_size(mp) > 0)
+			? extra_blocks - libxfs_agfl_size(mp)
+			: 0;
 
-		if (extra_blocks > 0)
-			sb_fdblocks_ag[agno] -= extra_blocks;
+	if (extra_blocks > 0)
+		sb_fdblocks_ag[agno] -= extra_blocks;
 
-		bcnt_btree_curs = bno_btree_curs;
+	bcnt_btree_curs = bno_btree_curs;
 
-		bno_btree_curs.owner = XFS_RMAP_OWN_AG;
-		bcnt_btree_curs.owner = XFS_RMAP_OWN_AG;
-		setup_cursor(mp, agno, &bno_btree_curs);
-		setup_cursor(mp, agno, &bcnt_btree_curs);
+	bno_btree_curs.owner = XFS_RMAP_OWN_AG;
+	bcnt_btree_curs.owner = XFS_RMAP_OWN_AG;
+	setup_cursor(mp, agno, &bno_btree_curs);
+	setup_cursor(mp, agno, &bcnt_btree_curs);
 
 #ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "# of bno extents is %d\n",
-				count_bno_extents(agno));
-		fprintf(stderr, "# of bcnt extents is %d\n",
-				count_bcnt_extents(agno));
+	fprintf(stderr, "# of bno extents is %d\n",
+			count_bno_extents(agno));
+	fprintf(stderr, "# of bcnt extents is %d\n",
+			count_bcnt_extents(agno));
 #endif
 
-		/*
-		 * now rebuild the freespace trees
-		 */
-		freeblks1 = build_freespace_tree(mp, agno,
-					&bno_btree_curs, XFS_BTNUM_BNO);
+	/*
+	 * now rebuild the freespace trees
+	 */
+	freeblks1 = build_freespace_tree(mp, agno,
+				&bno_btree_curs, XFS_BTNUM_BNO);
 #ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "# of free blocks == %d\n", freeblks1);
+	fprintf(stderr, "# of free blocks == %d\n", freeblks1);
 #endif
-		write_cursor(&bno_btree_curs);
+	write_cursor(&bno_btree_curs);
 
 #ifdef DEBUG
-		freeblks2 = build_freespace_tree(mp, agno,
-					&bcnt_btree_curs, XFS_BTNUM_CNT);
+	freeblks2 = build_freespace_tree(mp, agno,
+				&bcnt_btree_curs, XFS_BTNUM_CNT);
 #else
-		(void) build_freespace_tree(mp, agno,
-					&bcnt_btree_curs, XFS_BTNUM_CNT);
+	(void) build_freespace_tree(mp, agno,
+				&bcnt_btree_curs, XFS_BTNUM_CNT);
 #endif
-		write_cursor(&bcnt_btree_curs);
+	write_cursor(&bcnt_btree_curs);
 
-		ASSERT(freeblks1 == freeblks2);
+	ASSERT(freeblks1 == freeblks2);
 
-		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
-			build_rmap_tree(mp, agno, &rmap_btree_curs);
-			write_cursor(&rmap_btree_curs);
-			sb_fdblocks_ag[agno] += (rmap_btree_curs.num_tot_blocks -
-					rmap_btree_curs.num_free_blocks) - 1;
-		}
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		build_rmap_tree(mp, agno, &rmap_btree_curs);
+		write_cursor(&rmap_btree_curs);
+		sb_fdblocks_ag[agno] += (rmap_btree_curs.num_tot_blocks -
+				rmap_btree_curs.num_free_blocks) - 1;
+	}
 
-		if (xfs_sb_version_hasreflink(&mp->m_sb)) {
-			build_refcount_tree(mp, agno, &refcnt_btree_curs);
-			write_cursor(&refcnt_btree_curs);
-		}
+	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+		build_refcount_tree(mp, agno, &refcnt_btree_curs);
+		write_cursor(&refcnt_btree_curs);
+	}
 
-		/*
-		 * set up agf and agfl
-		 */
-		build_agf_agfl(mp, agno, &bno_btree_curs,
-				&bcnt_btree_curs, freeblks1, extra_blocks,
-				&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
-		/*
-		 * build inode allocation tree.
-		 */
-		build_ino_tree(mp, agno, &ino_btree_curs, XFS_BTNUM_INO,
-				&agi_stat);
-		write_cursor(&ino_btree_curs);
+	/*
+	 * set up agf and agfl
+	 */
+	build_agf_agfl(mp, agno, &bno_btree_curs,
+			&bcnt_btree_curs, freeblks1, extra_blocks,
+			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
+	/*
+	 * build inode allocation tree.
+	 */
+	build_ino_tree(mp, agno, &ino_btree_curs, XFS_BTNUM_INO,
+			&agi_stat);
+	write_cursor(&ino_btree_curs);
 
-		/*
-		 * build free inode tree
-		 */
-		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-			build_ino_tree(mp, agno, &fino_btree_curs,
-					XFS_BTNUM_FINO, NULL);
-			write_cursor(&fino_btree_curs);
-		}
+	/*
+	 * build free inode tree
+	 */
+	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+		build_ino_tree(mp, agno, &fino_btree_curs,
+				XFS_BTNUM_FINO, NULL);
+		write_cursor(&fino_btree_curs);
+	}
 
-		/* build the agi */
-		build_agi(mp, agno, &ino_btree_curs, &fino_btree_curs,
-			  &agi_stat);
+	/* build the agi */
+	build_agi(mp, agno, &ino_btree_curs, &fino_btree_curs,
+		  &agi_stat);
 
-		/*
-		 * tear down cursors
-		 */
-		finish_cursor(&bno_btree_curs);
-		finish_cursor(&ino_btree_curs);
-		if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-			finish_cursor(&rmap_btree_curs);
-		if (xfs_sb_version_hasreflink(&mp->m_sb))
-			finish_cursor(&refcnt_btree_curs);
-		if (xfs_sb_version_hasfinobt(&mp->m_sb))
-			finish_cursor(&fino_btree_curs);
-		finish_cursor(&bcnt_btree_curs);
+	/*
+	 * tear down cursors
+	 */
+	finish_cursor(&bno_btree_curs);
+	finish_cursor(&ino_btree_curs);
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+		finish_cursor(&rmap_btree_curs);
+	if (xfs_sb_version_hasreflink(&mp->m_sb))
+		finish_cursor(&refcnt_btree_curs);
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		finish_cursor(&fino_btree_curs);
+	finish_cursor(&bcnt_btree_curs);
+
+	/*
+	 * release the incore per-AG bno/bcnt trees so
+	 * the extent nodes can be recycled
+	 */
+	release_agbno_extent_tree(agno);
+	release_agbcnt_extent_tree(agno);
 
-		/*
-		 * release the incore per-AG bno/bcnt trees so
-		 * the extent nodes can be recycled
-		 */
-		release_agbno_extent_tree(agno);
-		release_agbcnt_extent_tree(agno);
-	}
 	PROG_RPT_INC(prog_rpt_done[agno], 1);
 }
 


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 2/9] xfs_repair: unindent phase 5 function Darrick J. Wong
@ 2020-01-01  1:21 ` Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 4/9] xfs_repair: rebuild free space btrees with bulk loader Darrick J. Wong
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Create some new support structures and functions to assist phase5 in
using the btree bulk loader to reconstruct metadata btrees.  This is the
first step in removing the open-coded rebuilding code.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |  231 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 210 insertions(+), 21 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index 4108e22b..ec236d4c 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -18,6 +18,7 @@
 #include "progress.h"
 #include "slab.h"
 #include "rmap.h"
+#include "bload.h"
 
 /*
  * we maintain the current slice (path from root to leaf)
@@ -65,6 +66,14 @@ typedef struct bt_status  {
 	uint64_t		owner;		/* owner */
 } bt_status_t;
 
+struct bt_rebuild {
+	struct xrep_newbt	newbt;
+	struct xfs_btree_bload	bload;
+	union {
+		struct xfs_slab_cursor	*slab_cursor;
+	};
+};
+
 /*
  * extra metadata for the agi
  */
@@ -306,6 +315,157 @@ _("error - not enough free space in filesystem\n"));
 #endif
 }
 
+/*
+ * Estimate proper slack values for a btree that's being reloaded.
+ *
+ * Under most circumstances, we'll take whatever default loading value the
+ * btree bulk loading code calculates for us.  However, there are some
+ * exceptions to this rule:
+ *
+ * (1) If someone turned one of the debug knobs.
+ * (2) The AG has less than ~9% space free.
+ *
+ * Note that we actually use 3/32 for the comparison to avoid division.
+ */
+static void
+estimate_ag_bload_slack(
+	struct repair_ctx	*sc,
+	struct xfs_btree_bload	*bload,
+	unsigned int		free)
+{
+	/*
+	 * The global values are set to -1 (i.e. take the bload defaults)
+	 * unless someone has set them otherwise, so we just pull the values
+	 * here.
+	 */
+	bload->leaf_slack = bload_leaf_slack;
+	bload->node_slack = bload_node_slack;
+
+	/* No further changes if there's more than 3/32ths space left. */
+	if (free >= ((sc->mp->m_sb.sb_agblocks * 3) >> 5))
+		return;
+
+	/* We're low on space; load the btrees as tightly as possible. */
+	if (bload->leaf_slack < 0)
+		bload->leaf_slack = 0;
+	if (bload->node_slack < 0)
+		bload->node_slack = 0;
+}
+
+/* Initialize a btree rebuild context. */
+static void
+init_rebuild(
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_agblock_t			free_space,
+	struct bt_rebuild		*btr)
+{
+	memset(btr, 0, sizeof(struct bt_rebuild));
+
+	xrep_newbt_init_bare(&btr->newbt, sc);
+	btr->newbt.oinfo = *oinfo; /* struct copy */
+	estimate_ag_bload_slack(sc, &btr->bload, free_space);
+}
+
+/* Reserve blocks for the new btree. */
+static void
+setup_rebuild(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr,
+	uint32_t		nr_blocks)
+{
+	struct extent_tree_node	*ext_ptr;
+	struct extent_tree_node	*bno_ext_ptr;
+	uint32_t		blocks_allocated = 0;
+	int			error;
+
+	/*
+	 * grab the smallest extent and use it up, then get the
+	 * next smallest.  This mimics the init_*_cursor code.
+	 */
+	ext_ptr =  findfirst_bcnt_extent(agno);
+
+	/*
+	 * set up the free block array
+	 */
+	while (blocks_allocated < nr_blocks)  {
+		uint64_t	len;
+		xfs_agblock_t	new_start;
+		xfs_extlen_t	new_len;
+
+		if (!ext_ptr)
+			do_error(
+_("error - not enough free space in filesystem\n"));
+
+		/* Use up the extent we've got. */
+		len = min(ext_ptr->ex_blockcount,
+				btr->bload.nr_blocks - blocks_allocated);
+		error = xrep_newbt_add_reservation(&btr->newbt,
+				XFS_AGB_TO_FSB(mp, agno,
+					       ext_ptr->ex_startblock),
+				len, NULL);
+		if (error)
+			do_error(_("could not set up btree reservation: %s\n"),
+				strerror(-error));
+		blocks_allocated += len;
+
+		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, len,
+				btr->newbt.oinfo.oi_owner);
+		if (error)
+			do_error(_("could not set up btree rmaps: %s\n"),
+				strerror(-error));
+
+		/* Figure out if we're putting anything back. */
+		new_start = ext_ptr->ex_startblock + len;
+		new_len = ext_ptr->ex_blockcount - len;
+
+		/* Delete the used-up extent from both extent trees. */
+#ifdef XR_BLD_FREE_TRACE
+		fprintf(stderr, "releasing extent: %u [%u %u]\n",
+			agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+#endif
+		bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
+		ASSERT(bno_ext_ptr != NULL);
+		get_bno_extent(agno, bno_ext_ptr);
+		release_extent_tree_node(bno_ext_ptr);
+
+		ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
+				ext_ptr->ex_blockcount);
+		ASSERT(ext_ptr != NULL);
+		release_extent_tree_node(ext_ptr);
+
+		/*
+		 * If we only used part of this last extent, then we need only
+		 * to reinsert the extent in the extent trees and we're done.
+		 */
+		if (new_len > 0) {
+			add_bno_extent(agno, new_start, new_len);
+			add_bcnt_extent(agno, new_start, new_len);
+			break;
+		}
+
+		/* Otherwise, find the next biggest extent. */
+		ext_ptr = findfirst_bcnt_extent(agno);
+	}
+#ifdef XR_BLD_FREE_TRACE
+	fprintf(stderr, "blocks_allocated = %d\n",
+		blocks_allocated);
+#endif
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+static int
+rebuild_alloc_block(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	void			*priv)
+{
+	struct bt_rebuild	*btr = priv;
+
+	return xrep_newbt_alloc_block(cur, &btr->newbt, ptr);
+}
+
 static void
 write_cursor(bt_status_t *curs)
 {
@@ -334,6 +494,34 @@ finish_cursor(bt_status_t *curs)
 	free(curs->btree_blocks);
 }
 
+static void
+finish_rebuild(
+	struct xfs_mount	*mp,
+	struct bt_rebuild	*btr)
+{
+	struct xrep_newbt_resv	*resv, *n;
+
+	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
+		xfs_agnumber_t	agno;
+		xfs_agblock_t	bno;
+		xfs_extlen_t	len;
+
+		if (resv->used >= resv->len)
+			continue;
+
+		/* XXX: Shouldn't this go on the AGFL? */
+		/* Put back everything we didn't use. */
+		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
+		agno = XFS_FSB_TO_AGNO(mp, resv->fsbno + resv->used);
+		len = resv->len - resv->used;
+
+		add_bno_extent(agno, bno, len);
+		add_bcnt_extent(agno, bno, len);
+	}
+
+	xrep_newbt_destroy(&btr->newbt, 0);
+}
+
 /*
  * We need to leave some free records in the tree for the corner case of
  * setting up the AGFL. This may require allocation of blocks, and as
@@ -2211,28 +2399,29 @@ keep_fsinos(xfs_mount_t *mp)
 
 static void
 phase5_func(
-	xfs_mount_t	*mp,
-	xfs_agnumber_t	agno,
-	struct xfs_slab	*lost_fsb)
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct xfs_slab		*lost_fsb)
 {
-	uint64_t	num_inos;
-	uint64_t	num_free_inos;
-	uint64_t	finobt_num_inos;
-	uint64_t	finobt_num_free_inos;
-	bt_status_t	bno_btree_curs;
-	bt_status_t	bcnt_btree_curs;
-	bt_status_t	ino_btree_curs;
-	bt_status_t	fino_btree_curs;
-	bt_status_t	rmap_btree_curs;
-	bt_status_t	refcnt_btree_curs;
-	int		extra_blocks = 0;
-	uint		num_freeblocks;
-	xfs_extlen_t	freeblks1;
+	struct repair_ctx	sc = { .mp = mp, };
+	struct agi_stat		agi_stat = {0,};
+	uint64_t		num_inos;
+	uint64_t		num_free_inos;
+	uint64_t		finobt_num_inos;
+	uint64_t		finobt_num_free_inos;
+	bt_status_t		bno_btree_curs;
+	bt_status_t		bcnt_btree_curs;
+	bt_status_t		ino_btree_curs;
+	bt_status_t		fino_btree_curs;
+	bt_status_t		rmap_btree_curs;
+	bt_status_t		refcnt_btree_curs;
+	int			extra_blocks = 0;
+	uint			num_freeblocks;
+	xfs_extlen_t		freeblks1;
 #ifdef DEBUG
-	xfs_extlen_t	freeblks2;
+	xfs_extlen_t		freeblks2;
 #endif
-	xfs_agblock_t	num_extents;
-	struct agi_stat	agi_stat = {0,};
+	xfs_agblock_t		num_extents;
 
 	if (verbose)
 		do_log(_("        - agno = %d\n"), agno);
@@ -2454,8 +2643,8 @@ inject_lost_blocks(
 		if (error)
 			goto out_cancel;
 
-		error = -libxfs_free_extent(tp, *fsb, 1, &XFS_RMAP_OINFO_AG,
-					    XFS_AG_RESV_NONE);
+		error = -libxfs_free_extent(tp, *fsb, 1,
+				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
 		if (error)
 			goto out_cancel;
 


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/9] xfs_repair: rebuild free space btrees with bulk loader
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (2 preceding siblings ...)
  2020-01-01  1:21 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
@ 2020-01-01  1:21 ` Darrick J. Wong
  2020-01-01  1:21 ` [PATCH 5/9] xfs_repair: rebuild inode " Darrick J. Wong
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the btree bulk loading functions to rebuild the free space btrees
and drop the open-coded implementation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    3 
 repair/phase5.c          |  859 ++++++++++++++--------------------------------
 2 files changed, 260 insertions(+), 602 deletions(-)


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 6bab5a70..60dc9297 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -178,5 +178,8 @@
 #define xfs_ialloc_calc_rootino		libxfs_ialloc_calc_rootino
 
 #define xfs_sb_read_secondary		libxfs_sb_read_secondary
+#define xfs_btree_bload_compute_geometry libxfs_btree_bload_compute_geometry
+#define xfs_btree_bload			libxfs_btree_bload
+#define xfs_allocbt_stage_cursor	libxfs_allocbt_stage_cursor
 
 #endif /* __LIBXFS_API_DEFS_H__ */
diff --git a/repair/phase5.c b/repair/phase5.c
index ec236d4c..2421c4bc 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -71,6 +71,10 @@ struct bt_rebuild {
 	struct xfs_btree_bload	bload;
 	union {
 		struct xfs_slab_cursor	*slab_cursor;
+		struct {
+			struct extent_tree_node	*bno_rec;
+			xfs_agblock_t		*freeblks;
+		};
 	};
 };
 
@@ -88,7 +92,10 @@ static uint64_t	*sb_ifree_ag;		/* free inodes per ag */
 static uint64_t	*sb_fdblocks_ag;	/* free data blocks per ag */
 
 static int
-mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
+mk_incore_fstree(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	unsigned int		*num_freeblocks)
 {
 	int			in_extent;
 	int			num_extents;
@@ -100,6 +107,8 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
 	xfs_extlen_t		blen;
 	int			bstate;
 
+	*num_freeblocks = 0;
+
 	/*
 	 * scan the bitmap for the ag looking for continuous
 	 * extents of free blocks.  At this point, we know
@@ -155,6 +164,7 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
 #endif
 				add_bno_extent(agno, extent_start, extent_len);
 				add_bcnt_extent(agno, extent_start, extent_len);
+				*num_freeblocks += extent_len;
 			}
 		}
 	}
@@ -168,6 +178,7 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
 #endif
 		add_bno_extent(agno, extent_start, extent_len);
 		add_bcnt_extent(agno, extent_start, extent_len);
+		*num_freeblocks += extent_len;
 	}
 
 	return(num_extents);
@@ -494,313 +505,32 @@ finish_cursor(bt_status_t *curs)
 	free(curs->btree_blocks);
 }
 
+/*
+ * Scoop up leftovers from a rebuild cursor for later freeing, then free the
+ * rebuild context.
+ */
 static void
 finish_rebuild(
 	struct xfs_mount	*mp,
-	struct bt_rebuild	*btr)
+	struct bt_rebuild	*btr,
+	struct xfs_slab		*lost_fsb)
 {
 	struct xrep_newbt_resv	*resv, *n;
 
 	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
-		xfs_agnumber_t	agno;
-		xfs_agblock_t	bno;
-		xfs_extlen_t	len;
-
-		if (resv->used >= resv->len)
-			continue;
-
-		/* XXX: Shouldn't this go on the AGFL? */
-		/* Put back everything we didn't use. */
-		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
-		agno = XFS_FSB_TO_AGNO(mp, resv->fsbno + resv->used);
-		len = resv->len - resv->used;
-
-		add_bno_extent(agno, bno, len);
-		add_bcnt_extent(agno, bno, len);
-	}
-
-	xrep_newbt_destroy(&btr->newbt, 0);
-}
-
-/*
- * We need to leave some free records in the tree for the corner case of
- * setting up the AGFL. This may require allocation of blocks, and as
- * such can require insertion of new records into the tree (e.g. moving
- * a record in the by-count tree when a long extent is shortened). If we
- * pack the records into the leaves with no slack space, this requires a
- * leaf split to occur and a block to be allocated from the free list.
- * If we don't have any blocks on the free list (because we are setting
- * it up!), then we fail, and the filesystem will fail with the same
- * failure at runtime. Hence leave a couple of records slack space in
- * each block to allow immediate modification of the tree without
- * requiring splits to be done.
- *
- * XXX(hch): any reason we don't just look at mp->m_alloc_mxr?
- */
-#define XR_ALLOC_BLOCK_MAXRECS(mp, level) \
-	(libxfs_allocbt_maxrecs((mp), (mp)->m_sb.sb_blocksize, (level) == 0) - 2)
-
-/*
- * this calculates a freespace cursor for an ag.
- * btree_curs is an in/out.  returns the number of
- * blocks that will show up in the AGFL.
- */
-static int
-calculate_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno,
-			xfs_agblock_t *extents, bt_status_t *btree_curs)
-{
-	xfs_extlen_t		blocks_needed;		/* a running count */
-	xfs_extlen_t		blocks_allocated_pt;	/* per tree */
-	xfs_extlen_t		blocks_allocated_total;	/* for both trees */
-	xfs_agblock_t		num_extents;
-	int			i;
-	int			extents_used;
-	int			extra_blocks;
-	bt_stat_level_t		*lptr;
-	bt_stat_level_t		*p_lptr;
-	extent_tree_node_t	*ext_ptr;
-	int			level;
-
-	num_extents = *extents;
-	extents_used = 0;
-
-	ASSERT(num_extents != 0);
-
-	lptr = &btree_curs->level[0];
-	btree_curs->init = 1;
-
-	/*
-	 * figure out how much space we need for the leaf level
-	 * of the tree and set up the cursor for the leaf level
-	 * (note that the same code is duplicated further down)
-	 */
-	lptr->num_blocks = howmany(num_extents, XR_ALLOC_BLOCK_MAXRECS(mp, 0));
-	lptr->num_recs_pb = num_extents / lptr->num_blocks;
-	lptr->modulo = num_extents % lptr->num_blocks;
-	lptr->num_recs_tot = num_extents;
-	level = 1;
-
-#ifdef XR_BLD_FREE_TRACE
-	fprintf(stderr, "%s 0 %d %d %d %d\n", __func__,
-			lptr->num_blocks,
-			lptr->num_recs_pb,
-			lptr->modulo,
-			lptr->num_recs_tot);
-#endif
-	/*
-	 * if we need more levels, set them up.  # of records
-	 * per level is the # of blocks in the level below it
-	 */
-	if (lptr->num_blocks > 1)  {
-		for (; btree_curs->level[level - 1].num_blocks > 1
-				&& level < XFS_BTREE_MAXLEVELS;
-				level++)  {
-			lptr = &btree_curs->level[level];
-			p_lptr = &btree_curs->level[level - 1];
-			lptr->num_blocks = howmany(p_lptr->num_blocks,
-					XR_ALLOC_BLOCK_MAXRECS(mp, level));
-			lptr->modulo = p_lptr->num_blocks
-					% lptr->num_blocks;
-			lptr->num_recs_pb = p_lptr->num_blocks
-					/ lptr->num_blocks;
-			lptr->num_recs_tot = p_lptr->num_blocks;
-#ifdef XR_BLD_FREE_TRACE
-			fprintf(stderr, "%s %d %d %d %d %d\n", __func__,
-					level,
-					lptr->num_blocks,
-					lptr->num_recs_pb,
-					lptr->modulo,
-					lptr->num_recs_tot);
-#endif
-		}
-	}
+		while (resv->used < resv->len) {
+			xfs_fsblock_t	fsb = resv->fsbno + resv->used;
+			int		error;
 
-	ASSERT(lptr->num_blocks == 1);
-	btree_curs->num_levels = level;
-
-	/*
-	 * ok, now we have a hypothetical cursor that
-	 * will work for both the bno and bcnt trees.
-	 * now figure out if using up blocks to set up the
-	 * trees will perturb the shape of the freespace tree.
-	 * if so, we've over-allocated.  the freespace trees
-	 * as they will be *after* accounting for the free space
-	 * we've used up will need fewer blocks to to represent
-	 * than we've allocated.  We can use the AGFL to hold
-	 * xfs_agfl_size (sector/xfs_agfl_t) blocks but that's it.
-	 * Thus we limit things to xfs_agfl_size/2 for each of the 2 btrees.
-	 * if the number of extra blocks is more than that,
-	 * we'll have to be called again.
-	 */
-	for (blocks_needed = 0, i = 0; i < level; i++)  {
-		blocks_needed += btree_curs->level[i].num_blocks;
-	}
-
-	/*
-	 * record the # of blocks we've allocated
-	 */
-	blocks_allocated_pt = blocks_needed;
-	blocks_needed *= 2;
-	blocks_allocated_total = blocks_needed;
-
-	/*
-	 * figure out how many free extents will be used up by
-	 * our space allocation
-	 */
-	if ((ext_ptr = findfirst_bcnt_extent(agno)) == NULL)
-		do_error(_("can't rebuild fs trees -- not enough free space "
-			   "on ag %u\n"), agno);
-
-	while (ext_ptr != NULL && blocks_needed > 0)  {
-		if (ext_ptr->ex_blockcount <= blocks_needed)  {
-			blocks_needed -= ext_ptr->ex_blockcount;
-			extents_used++;
-		} else  {
-			blocks_needed = 0;
-		}
-
-		ext_ptr = findnext_bcnt_extent(agno, ext_ptr);
-
-#ifdef XR_BLD_FREE_TRACE
-		if (ext_ptr != NULL)  {
-			fprintf(stderr, "got next extent [%u %u]\n",
-				ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
-		} else  {
-			fprintf(stderr, "out of extents\n");
-		}
-#endif
-	}
-	if (blocks_needed > 0)
-		do_error(_("ag %u - not enough free space to build freespace "
-			   "btrees\n"), agno);
-
-	ASSERT(num_extents >= extents_used);
-
-	num_extents -= extents_used;
-
-	/*
-	 * see if the number of leaf blocks will change as a result
-	 * of the number of extents changing
-	 */
-	if (howmany(num_extents, XR_ALLOC_BLOCK_MAXRECS(mp, 0))
-			!= btree_curs->level[0].num_blocks)  {
-		/*
-		 * yes -- recalculate the cursor.  If the number of
-		 * excess (overallocated) blocks is < xfs_agfl_size/2, we're ok.
-		 * we can put those into the AGFL.  we don't try
-		 * and get things to converge exactly (reach a
-		 * state with zero excess blocks) because there
-		 * exist pathological cases which will never
-		 * converge.  first, check for the zero-case.
-		 */
-		if (num_extents == 0)  {
-			/*
-			 * ok, we've used up all the free blocks
-			 * trying to lay out the leaf level. go
-			 * to a one block (empty) btree and put the
-			 * already allocated blocks into the AGFL
-			 */
-			if (btree_curs->level[0].num_blocks != 1)  {
-				/*
-				 * we really needed more blocks because
-				 * the old tree had more than one level.
-				 * this is bad.
-				 */
-				 do_warn(_("not enough free blocks left to "
-					   "describe all free blocks in AG "
-					   "%u\n"), agno);
-			}
-#ifdef XR_BLD_FREE_TRACE
-			fprintf(stderr,
-				"ag %u -- no free extents, alloc'ed %d\n",
-				agno, blocks_allocated_pt);
-#endif
-			lptr->num_blocks = 1;
-			lptr->modulo = 0;
-			lptr->num_recs_pb = 0;
-			lptr->num_recs_tot = 0;
-
-			btree_curs->num_levels = 1;
-
-			/*
-			 * don't reset the allocation stats, assume
-			 * they're all extra blocks
-			 * don't forget to return the total block count
-			 * not the per-tree block count.  these are the
-			 * extras that will go into the AGFL.  subtract
-			 * two for the root blocks.
-			 */
-			btree_curs->num_tot_blocks = blocks_allocated_pt;
-			btree_curs->num_free_blocks = blocks_allocated_pt;
-
-			*extents = 0;
-
-			return(blocks_allocated_total - 2);
-		}
-
-		lptr = &btree_curs->level[0];
-		lptr->num_blocks = howmany(num_extents,
-					XR_ALLOC_BLOCK_MAXRECS(mp, 0));
-		lptr->num_recs_pb = num_extents / lptr->num_blocks;
-		lptr->modulo = num_extents % lptr->num_blocks;
-		lptr->num_recs_tot = num_extents;
-		level = 1;
-
-		/*
-		 * if we need more levels, set them up
-		 */
-		if (lptr->num_blocks > 1)  {
-			for (level = 1; btree_curs->level[level-1].num_blocks
-					> 1 && level < XFS_BTREE_MAXLEVELS;
-					level++)  {
-				lptr = &btree_curs->level[level];
-				p_lptr = &btree_curs->level[level-1];
-				lptr->num_blocks = howmany(p_lptr->num_blocks,
-					XR_ALLOC_BLOCK_MAXRECS(mp, level));
-				lptr->modulo = p_lptr->num_blocks
-						% lptr->num_blocks;
-				lptr->num_recs_pb = p_lptr->num_blocks
-						/ lptr->num_blocks;
-				lptr->num_recs_tot = p_lptr->num_blocks;
-			}
-		}
-		ASSERT(lptr->num_blocks == 1);
-		btree_curs->num_levels = level;
-
-		/*
-		 * now figure out the number of excess blocks
-		 */
-		for (blocks_needed = 0, i = 0; i < level; i++)  {
-			blocks_needed += btree_curs->level[i].num_blocks;
-		}
-		blocks_needed *= 2;
-
-		ASSERT(blocks_allocated_total >= blocks_needed);
-		extra_blocks = blocks_allocated_total - blocks_needed;
-	} else  {
-		if (extents_used > 0) {
-			/*
-			 * reset the leaf level geometry to account
-			 * for consumed extents.  we can leave the
-			 * rest of the cursor alone since the number
-			 * of leaf blocks hasn't changed.
-			 */
-			lptr = &btree_curs->level[0];
-
-			lptr->num_recs_pb = num_extents / lptr->num_blocks;
-			lptr->modulo = num_extents % lptr->num_blocks;
-			lptr->num_recs_tot = num_extents;
+			error = slab_add(lost_fsb, &fsb);
+			if (error)
+				do_error(
+_("Insufficient memory saving lost blocks.\n"));
+			resv->used++;
 		}
-
-		extra_blocks = 0;
 	}
 
-	btree_curs->num_tot_blocks = blocks_allocated_pt;
-	btree_curs->num_free_blocks = blocks_allocated_pt;
-
-	*extents = num_extents;
-
-	return(extra_blocks);
+	xrep_newbt_destroy(&btr->newbt, 0);
 }
 
 /* Map btnum to buffer ops for the types that need it. */
@@ -827,251 +557,211 @@ btnum_to_ops(
 	}
 }
 
+/*
+ * Free Space Btrees
+ *
+ * We need to leave some free records in the tree for the corner case of
+ * setting up the AGFL. This may require allocation of blocks, and as
+ * such can require insertion of new records into the tree (e.g. moving
+ * a record in the by-count tree when a long extent is shortened). If we
+ * pack the records into the leaves with no slack space, this requires a
+ * leaf split to occur and a block to be allocated from the free list.
+ * If we don't have any blocks on the free list (because we are setting
+ * it up!), then we fail, and the filesystem will fail with the same
+ * failure at runtime. Hence leave a couple of records slack space in
+ * each block to allow immediate modification of the tree without
+ * requiring splits to be done.
+ */
+
 static void
-prop_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno,
-		bt_status_t *btree_curs, xfs_agblock_t startblock,
-		xfs_extlen_t blockcount, int level, xfs_btnum_t btnum)
+init_freespace_cursors(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	unsigned int		free_space,
+	unsigned int		*nr_extents,
+	int			*extra_blocks,
+	struct bt_rebuild	*btr_bno,
+	struct bt_rebuild	*btr_cnt)
 {
-	struct xfs_btree_block	*bt_hdr;
-	xfs_alloc_key_t		*bt_key;
-	xfs_alloc_ptr_t		*bt_ptr;
-	xfs_agblock_t		agbno;
-	bt_stat_level_t		*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
-
-	ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
-
-	level++;
-
-	if (level >= btree_curs->num_levels)
-		return;
-
-	lptr = &btree_curs->level[level];
-	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
-		/*
-		 * only happens once when initializing the
-		 * left-hand side of the tree.
-		 */
-		prop_freespace_cursor(mp, agno, btree_curs, startblock,
-				blockcount, level, btnum);
-	}
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
-				lptr->num_recs_pb + (lptr->modulo > 0))  {
-		/*
-		 * write out current prev block, grab us a new block,
-		 * and set the rightsib pointer of current block
-		 */
-#ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, " %d ", lptr->prev_agbno);
-#endif
-		if (lptr->prev_agbno != NULLAGBLOCK) {
-			ASSERT(lptr->prev_buf_p != NULL);
-			libxfs_writebuf(lptr->prev_buf_p, 0);
-		}
-		lptr->prev_agbno = lptr->agbno;;
-		lptr->prev_buf_p = lptr->buf_p;
-		agbno = get_next_blockaddr(agno, level, btree_curs);
+	struct xfs_btree_cur	*cur;
+	unsigned int		bno_blocks;
+	unsigned int		cnt_blocks;
+	int			error;
 
-		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
+	init_rebuild(sc, &XFS_RMAP_OINFO_AG, free_space, btr_bno);
+	init_rebuild(sc, &XFS_RMAP_OINFO_AG, free_space, btr_cnt);
 
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		lptr->agbno = agbno;
+	/*
+	 * Now we need to allocate blocks for the free space btrees using the
+	 * free space records we're about to put in them.  Every record we use
+	 * can change the shape of the free space trees, so we recompute the
+	 * btree shape until we stop needing /more/ blocks.  If we have any
+	 * left over we'll stash them in the AGFL when we're done.
+	 */
+	do {
+		unsigned int	num_freeblocks;
 
-		if (lptr->modulo)
-			lptr->modulo--;
+		bno_blocks = btr_bno->bload.nr_blocks;
+		cnt_blocks = btr_cnt->bload.nr_blocks;
 
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, level,
-					0, agno);
+		/* Compute how many bnobt blocks we'll need. */
+		cur = libxfs_allocbt_stage_cursor(sc->mp, sc->tp,
+				&btr_bno->newbt.afake, agno, XFS_BTNUM_BNO);
+		error = -libxfs_btree_bload_compute_geometry(cur,
+				&btr_bno->bload, *nr_extents);
+		if (error)
+			do_error(
+_("Unable to compute free space by block btree geometry, error %d.\n"), -error);
+		libxfs_btree_del_cursor(cur, error);
+
+		/* Compute how many cntbt blocks we'll need. */
+		cur = libxfs_allocbt_stage_cursor(sc->mp, sc->tp,
+				&btr_cnt->newbt.afake, agno, XFS_BTNUM_CNT);
+		error = -libxfs_btree_bload_compute_geometry(cur,
+				&btr_cnt->bload, *nr_extents);
+		if (error)
+			do_error(
+_("Unable to compute free space by length btree geometry, error %d.\n"), -error);
+		libxfs_btree_del_cursor(cur, error);
 
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+		/* We don't need any more blocks, so we're done. */
+		if (bno_blocks >= btr_bno->bload.nr_blocks &&
+		    cnt_blocks >= btr_cnt->bload.nr_blocks)
+			break;
 
-		/*
-		 * propagate extent record for first extent in new block up
-		 */
-		prop_freespace_cursor(mp, agno, btree_curs, startblock,
-				blockcount, level, btnum);
-	}
-	/*
-	 * add extent info to current block
-	 */
-	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
+		/* Allocate however many more blocks we need this time. */
+		if (bno_blocks < btr_bno->bload.nr_blocks)
+			setup_rebuild(sc->mp, agno, btr_bno,
+					btr_bno->bload.nr_blocks - bno_blocks);
+		if (cnt_blocks < btr_cnt->bload.nr_blocks)
+			setup_rebuild(sc->mp, agno, btr_cnt,
+					btr_cnt->bload.nr_blocks - cnt_blocks);
 
-	bt_key = XFS_ALLOC_KEY_ADDR(mp, bt_hdr,
-				be16_to_cpu(bt_hdr->bb_numrecs));
-	bt_ptr = XFS_ALLOC_PTR_ADDR(mp, bt_hdr,
-				be16_to_cpu(bt_hdr->bb_numrecs),
-				mp->m_alloc_mxr[1]);
+		/* Ok, now how many free space records do we have? */
+		*nr_extents = count_bno_extents_blocks(agno, &num_freeblocks);
+	} while (1);
 
-	bt_key->ar_startblock = cpu_to_be32(startblock);
-	bt_key->ar_blockcount = cpu_to_be32(blockcount);
-	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+	*extra_blocks = (bno_blocks - btr_bno->bload.nr_blocks) +
+			(cnt_blocks - btr_cnt->bload.nr_blocks);
 }
 
-/*
- * rebuilds a freespace tree given a cursor and type
- * of tree to build (bno or bcnt).  returns the number of free blocks
- * represented by the tree.
- */
-static xfs_extlen_t
-build_freespace_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
-		bt_status_t *btree_curs, xfs_btnum_t btnum)
+static void
+get_freesp_data(
+	struct xfs_btree_cur		*cur,
+	struct extent_tree_node		*bno_rec,
+	xfs_agblock_t			*freeblks)
 {
-	xfs_agnumber_t		i;
-	xfs_agblock_t		j;
-	struct xfs_btree_block	*bt_hdr;
-	xfs_alloc_rec_t		*bt_rec;
-	int			level;
-	xfs_agblock_t		agbno;
-	extent_tree_node_t	*ext_ptr;
-	bt_stat_level_t		*lptr;
-	xfs_extlen_t		freeblks;
-	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
-
-	ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
+	struct xfs_alloc_rec_incore	*arec = &cur->bc_rec.a;
 
-#ifdef XR_BLD_FREE_TRACE
-	fprintf(stderr, "in build_freespace_tree, agno = %d\n", agno);
-#endif
-	level = btree_curs->num_levels;
-	freeblks = 0;
+	arec->ar_startblock = bno_rec->ex_startblock;
+	arec->ar_blockcount = bno_rec->ex_blockcount;
+	if (freeblks)
+		*freeblks += bno_rec->ex_blockcount;
+}
 
-	ASSERT(level > 0);
+/* Grab one bnobt record. */
+static int
+get_bnobt_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
+{
+	struct bt_rebuild		*btr = priv;
 
-	/*
-	 * initialize the first block on each btree level
-	 */
-	for (i = 0; i < level; i++)  {
-		lptr = &btree_curs->level[i];
+	get_freesp_data(cur, btr->bno_rec, btr->freeblks);
+	btr->bno_rec = findnext_bno_extent(btr->bno_rec);
+	return 0;
+}
 
-		agbno = get_next_blockaddr(agno, i, btree_curs);
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
+/* Rebuild a free space by block number btree. */
+static void
+build_bnobt(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_bno,
+	xfs_agblock_t		*freeblks)
+{
+	struct xfs_btree_cur	*cur;
+	int			error;
 
-		if (i == btree_curs->num_levels - 1)
-			btree_curs->root = agbno;
+	*freeblks = 0;
+	btr_bno->bload.get_data = get_bnobt_data;
+	btr_bno->bload.alloc_block = rebuild_alloc_block;
+	btr_bno->bno_rec = findfirst_bno_extent(agno);
+	btr_bno->freeblks = freeblks;
 
-		lptr->agbno = agbno;
-		lptr->prev_agbno = NULLAGBLOCK;
-		lptr->prev_buf_p = NULL;
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, i, 0, agno);
-	}
-	/*
-	 * run along leaf, setting up records.  as we have to switch
-	 * blocks, call the prop_freespace_cursor routine to set up the new
-	 * pointers for the parent.  that can recurse up to the root
-	 * if required.  set the sibling pointers for leaf level here.
-	 */
-	if (btnum == XFS_BTNUM_BNO)
-		ext_ptr = findfirst_bno_extent(agno);
-	else
-		ext_ptr = findfirst_bcnt_extent(agno);
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
+	if (error)
+		do_error(
+_("Insufficient memory to construct bnobt rebuild transaction.\n"));
 
-#ifdef XR_BLD_FREE_TRACE
-	fprintf(stderr, "bft, agno = %d, start = %u, count = %u\n",
-		agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
-#endif
+	/* Add all observed bnobt records. */
+	cur = libxfs_allocbt_stage_cursor(sc->mp, sc->tp,
+			&btr_bno->newbt.afake, agno, XFS_BTNUM_BNO);
+	error = -libxfs_btree_bload(cur, &btr_bno->bload, btr_bno);
+	if (error)
+		do_error(
+_("Error %d while creating bnobt btree for AG %u.\n"), error, agno);
 
-	lptr = &btree_curs->level[0];
+	/* Since we're not writing the AGF yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(cur, 0);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing bnobt btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
+}
 
-	for (i = 0; i < btree_curs->level[0].num_blocks; i++)  {
-		/*
-		 * block initialization, lay in block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, 0, 0, agno);
+/* Grab one cntbt record. */
+static int
+get_cntbt_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
+{
+	struct bt_rebuild		*btr = priv;
 
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-		bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
-							(lptr->modulo > 0));
-#ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "bft, bb_numrecs = %d\n",
-				be16_to_cpu(bt_hdr->bb_numrecs));
-#endif
+	get_freesp_data(cur, btr->bno_rec, btr->freeblks);
+	btr->bno_rec = findnext_bcnt_extent(cur->bc_private.a.agno,
+			btr->bno_rec);
+	return 0;
+}
 
-		if (lptr->modulo > 0)
-			lptr->modulo--;
+/* Rebuild a freespace by count btree. */
+static void
+build_cntbt(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_cnt,
+	xfs_agblock_t		*freeblks)
+{
+	struct xfs_btree_cur	*cur;
+	int			error;
 
-		/*
-		 * initialize values in the path up to the root if
-		 * this is a multi-level btree
-		 */
-		if (btree_curs->num_levels > 1)
-			prop_freespace_cursor(mp, agno, btree_curs,
-					ext_ptr->ex_startblock,
-					ext_ptr->ex_blockcount,
-					0, btnum);
-
-		bt_rec = (xfs_alloc_rec_t *)
-			  ((char *)bt_hdr + XFS_ALLOC_BLOCK_LEN(mp));
-		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
-			ASSERT(ext_ptr != NULL);
-			bt_rec[j].ar_startblock = cpu_to_be32(
-							ext_ptr->ex_startblock);
-			bt_rec[j].ar_blockcount = cpu_to_be32(
-							ext_ptr->ex_blockcount);
-			freeblks += ext_ptr->ex_blockcount;
-			if (btnum == XFS_BTNUM_BNO)
-				ext_ptr = findnext_bno_extent(ext_ptr);
-			else
-				ext_ptr = findnext_bcnt_extent(agno, ext_ptr);
-#if 0
-#ifdef XR_BLD_FREE_TRACE
-			if (ext_ptr == NULL)
-				fprintf(stderr, "null extent pointer, j = %d\n",
-					j);
-			else
-				fprintf(stderr,
-				"bft, agno = %d, start = %u, count = %u\n",
-					agno, ext_ptr->ex_startblock,
-					ext_ptr->ex_blockcount);
-#endif
-#endif
-		}
+	*freeblks = 0;
+	btr_cnt->bload.get_data = get_cntbt_data;
+	btr_cnt->bload.alloc_block = rebuild_alloc_block;
+	btr_cnt->bno_rec = findfirst_bcnt_extent(agno);
+	btr_cnt->freeblks = freeblks;
 
-		if (ext_ptr != NULL)  {
-			/*
-			 * get next leaf level block
-			 */
-			if (lptr->prev_buf_p != NULL)  {
-#ifdef XR_BLD_FREE_TRACE
-				fprintf(stderr, " writing fst agbno %u\n",
-					lptr->prev_agbno);
-#endif
-				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
-				libxfs_writebuf(lptr->prev_buf_p, 0);
-			}
-			lptr->prev_buf_p = lptr->buf_p;
-			lptr->prev_agbno = lptr->agbno;
-			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
-			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
+	if (error)
+		do_error(
+_("Insufficient memory to construct cntbt rebuild transaction.\n"));
 
-			lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		}
-	}
+	/* Add all observed cntbt records. */
+	cur = libxfs_allocbt_stage_cursor(sc->mp, sc->tp,
+			&btr_cnt->newbt.afake, agno, XFS_BTNUM_CNT);
+	error = -libxfs_btree_bload(cur, &btr_cnt->bload, btr_cnt);
+	if (error)
+		do_error(
+_("Error %d while creating cntbt btree for AG %u.\n"), error, agno);
 
-	return(freeblks);
+	/* Since we're not writing the AGF yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(cur, 0);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing cntbt btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
 }
 
 /*
@@ -2157,6 +1847,27 @@ _("Insufficient memory to construct refcount cursor."));
 	free_slab_cursor(&refc_cur);
 }
 
+/* Fill the AGFL with any leftover bnobt rebuilder blocks. */
+static void
+fill_agfl(
+	struct bt_rebuild	*btr,
+	__be32			*agfl_bnos,
+	int			*i)
+{
+	struct xrep_newbt_resv	*resv, *n;
+	struct xfs_mount	*mp = btr->newbt.sc->mp;
+
+	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
+		xfs_agblock_t	bno;
+
+		bno = XFS_FSB_TO_AGBNO(mp, resv->fsbno + resv->used);
+		while (resv->used < resv->len && (*i) < libxfs_agfl_size(mp)) {
+			agfl_bnos[(*i)++] = cpu_to_be32(bno++);
+			resv->used++;
+		}
+	}
+}
+
 /*
  * build both the agf and the agfl for an agno given both
  * btree cursors.
@@ -2167,8 +1878,8 @@ static void
 build_agf_agfl(
 	struct xfs_mount	*mp,
 	xfs_agnumber_t		agno,
-	struct bt_status	*bno_bt,
-	struct bt_status	*bcnt_bt,
+	struct bt_rebuild	*btr_bno,
+	struct bt_rebuild	*btr_cnt,
 	xfs_extlen_t		freeblks,	/* # free blocks in tree */
 	int			lostblocks,	/* # blocks that will be lost */
 	struct bt_status	*rmap_bt,
@@ -2180,9 +1891,7 @@ build_agf_agfl(
 	int			i;
 	struct xfs_agfl		*agfl;
 	struct xfs_agf		*agf;
-	xfs_fsblock_t		fsb;
 	__be32			*freelist;
-	int			error;
 
 	agf_buf = libxfs_getbuf(mp->m_dev,
 			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
@@ -2209,10 +1918,14 @@ build_agf_agfl(
 		agf->agf_length = cpu_to_be32(mp->m_sb.sb_dblocks -
 			(xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno);
 
-	agf->agf_roots[XFS_BTNUM_BNO] = cpu_to_be32(bno_bt->root);
-	agf->agf_levels[XFS_BTNUM_BNO] = cpu_to_be32(bno_bt->num_levels);
-	agf->agf_roots[XFS_BTNUM_CNT] = cpu_to_be32(bcnt_bt->root);
-	agf->agf_levels[XFS_BTNUM_CNT] = cpu_to_be32(bcnt_bt->num_levels);
+	agf->agf_roots[XFS_BTNUM_BNO] =
+			cpu_to_be32(btr_bno->newbt.afake.af_root);
+	agf->agf_levels[XFS_BTNUM_BNO] =
+			cpu_to_be32(btr_bno->newbt.afake.af_levels);
+	agf->agf_roots[XFS_BTNUM_CNT] =
+			cpu_to_be32(btr_cnt->newbt.afake.af_root);
+	agf->agf_levels[XFS_BTNUM_CNT] =
+			cpu_to_be32(btr_cnt->newbt.afake.af_levels);
 	agf->agf_roots[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->root);
 	agf->agf_levels[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->num_levels);
 	agf->agf_freeblks = cpu_to_be32(freeblks);
@@ -2232,9 +1945,8 @@ build_agf_agfl(
 		 * Don't count the root blocks as they are already
 		 * accounted for.
 		 */
-		blks = (bno_bt->num_tot_blocks - bno_bt->num_free_blocks) +
-			(bcnt_bt->num_tot_blocks - bcnt_bt->num_free_blocks) -
-			2;
+		blks = btr_bno->newbt.afake.af_blocks +
+			btr_cnt->newbt.afake.af_blocks - 2;
 		if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 			blks += rmap_bt->num_tot_blocks - rmap_bt->num_free_blocks - 1;
 		agf->agf_btreeblks = cpu_to_be32(blks);
@@ -2272,49 +1984,14 @@ build_agf_agfl(
 			agfl->agfl_bno[i] = cpu_to_be32(NULLAGBLOCK);
 	}
 	freelist = XFS_BUF_TO_AGFL_BNO(mp, agfl_buf);
+	i = 0;
 
-	/*
-	 * do we have left-over blocks in the btree cursors that should
-	 * be used to fill the AGFL?
-	 */
-	if (bno_bt->num_free_blocks > 0 || bcnt_bt->num_free_blocks > 0)  {
-		/*
-		 * yes, now grab as many blocks as we can
-		 */
-		i = 0;
-		while (bno_bt->num_free_blocks > 0 && i < libxfs_agfl_size(mp))
-		{
-			freelist[i] = cpu_to_be32(
-					get_next_blockaddr(agno, 0, bno_bt));
-			i++;
-		}
-
-		while (bcnt_bt->num_free_blocks > 0 && i < libxfs_agfl_size(mp))
-		{
-			freelist[i] = cpu_to_be32(
-					get_next_blockaddr(agno, 0, bcnt_bt));
-			i++;
-		}
-		/*
-		 * now throw the rest of the blocks away and complain
-		 */
-		while (bno_bt->num_free_blocks > 0) {
-			fsb = XFS_AGB_TO_FSB(mp, agno,
-					get_next_blockaddr(agno, 0, bno_bt));
-			error = slab_add(lost_fsb, &fsb);
-			if (error)
-				do_error(
-_("Insufficient memory saving lost blocks.\n"));
-		}
-		while (bcnt_bt->num_free_blocks > 0) {
-			fsb = XFS_AGB_TO_FSB(mp, agno,
-					get_next_blockaddr(agno, 0, bcnt_bt));
-			error = slab_add(lost_fsb, &fsb);
-			if (error)
-				do_error(
-_("Insufficient memory saving lost blocks.\n"));
-		}
+	/* Fill the AGFL with leftover blocks or save them for later. */
+	fill_agfl(btr_bno, freelist, &i);
+	fill_agfl(btr_cnt, freelist, &i);
 
+	/* Set the AGF counters for the AGFL. */
+	if (i > 0) {
 		agf->agf_flfirst = 0;
 		agf->agf_fllast = cpu_to_be32(i - 1);
 		agf->agf_flcount = cpu_to_be32(i);
@@ -2409,8 +2086,8 @@ phase5_func(
 	uint64_t		num_free_inos;
 	uint64_t		finobt_num_inos;
 	uint64_t		finobt_num_free_inos;
-	bt_status_t		bno_btree_curs;
-	bt_status_t		bcnt_btree_curs;
+	struct bt_rebuild	btr_bno;
+	struct bt_rebuild	btr_cnt;
 	bt_status_t		ino_btree_curs;
 	bt_status_t		fino_btree_curs;
 	bt_status_t		rmap_btree_curs;
@@ -2418,9 +2095,7 @@ phase5_func(
 	int			extra_blocks = 0;
 	uint			num_freeblocks;
 	xfs_extlen_t		freeblks1;
-#ifdef DEBUG
 	xfs_extlen_t		freeblks2;
-#endif
 	xfs_agblock_t		num_extents;
 
 	if (verbose)
@@ -2429,7 +2104,7 @@ phase5_func(
 	/*
 	 * build up incore bno and bcnt extent btrees
 	 */
-	num_extents = mk_incore_fstree(mp, agno);
+	num_extents = mk_incore_fstree(mp, agno, &num_freeblocks);
 
 #ifdef XR_BLD_FREE_TRACE
 	fprintf(stderr, "# of bno extents is %d\n",
@@ -2508,8 +2183,8 @@ phase5_func(
 	/*
 	 * track blocks that we might really lose
 	 */
-	extra_blocks = calculate_freespace_cursor(mp, agno,
-				&num_extents, &bno_btree_curs);
+	init_freespace_cursors(&sc, agno, num_freeblocks, &num_extents,
+			&extra_blocks, &btr_bno, &btr_cnt);
 
 	/*
 	 * freespace btrees live in the "free space" but
@@ -2527,13 +2202,6 @@ phase5_func(
 	if (extra_blocks > 0)
 		sb_fdblocks_ag[agno] -= extra_blocks;
 
-	bcnt_btree_curs = bno_btree_curs;
-
-	bno_btree_curs.owner = XFS_RMAP_OWN_AG;
-	bcnt_btree_curs.owner = XFS_RMAP_OWN_AG;
-	setup_cursor(mp, agno, &bno_btree_curs);
-	setup_cursor(mp, agno, &bcnt_btree_curs);
-
 #ifdef XR_BLD_FREE_TRACE
 	fprintf(stderr, "# of bno extents is %d\n",
 			count_bno_extents(agno));
@@ -2541,25 +2209,13 @@ phase5_func(
 			count_bcnt_extents(agno));
 #endif
 
-	/*
-	 * now rebuild the freespace trees
-	 */
-	freeblks1 = build_freespace_tree(mp, agno,
-				&bno_btree_curs, XFS_BTNUM_BNO);
+	/* Rebuild the freespace btrees. */
+	build_bnobt(&sc, agno, &btr_bno, &freeblks1);
+	build_cntbt(&sc, agno, &btr_cnt, &freeblks2);
+
 #ifdef XR_BLD_FREE_TRACE
-	fprintf(stderr, "# of free blocks == %d\n", freeblks1);
-#endif
-	write_cursor(&bno_btree_curs);
-
-#ifdef DEBUG
-	freeblks2 = build_freespace_tree(mp, agno,
-				&bcnt_btree_curs, XFS_BTNUM_CNT);
-#else
-	(void) build_freespace_tree(mp, agno,
-				&bcnt_btree_curs, XFS_BTNUM_CNT);
+	fprintf(stderr, "# of free blocks == %d/%d\n", freeblks1, freeblks2);
 #endif
-	write_cursor(&bcnt_btree_curs);
-
 	ASSERT(freeblks1 == freeblks2);
 
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
@@ -2577,9 +2233,9 @@ phase5_func(
 	/*
 	 * set up agf and agfl
 	 */
-	build_agf_agfl(mp, agno, &bno_btree_curs,
-			&bcnt_btree_curs, freeblks1, extra_blocks,
+	build_agf_agfl(mp, agno, &btr_bno, &btr_cnt, freeblks1, extra_blocks,
 			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
+
 	/*
 	 * build inode allocation tree.
 	 */
@@ -2603,15 +2259,14 @@ phase5_func(
 	/*
 	 * tear down cursors
 	 */
-	finish_cursor(&bno_btree_curs);
-	finish_cursor(&ino_btree_curs);
+	finish_rebuild(mp, &btr_bno, lost_fsb);
+	finish_rebuild(mp, &btr_cnt, lost_fsb);
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 		finish_cursor(&rmap_btree_curs);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
 		finish_cursor(&refcnt_btree_curs);
 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
 		finish_cursor(&fino_btree_curs);
-	finish_cursor(&bcnt_btree_curs);
 
 	/*
 	 * release the incore per-AG bno/bcnt trees so


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 5/9] xfs_repair: rebuild inode btrees with bulk loader
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (3 preceding siblings ...)
  2020-01-01  1:21 ` [PATCH 4/9] xfs_repair: rebuild free space btrees with bulk loader Darrick J. Wong
@ 2020-01-01  1:21 ` Darrick J. Wong
  2020-01-01  1:22 ` [PATCH 6/9] xfs_repair: rebuild reverse mapping " Darrick J. Wong
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the btree bulk loading functions to rebuild the inode btrees
and drop the open-coded implementation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    1 
 repair/phase5.c          |  607 +++++++++++++++++-----------------------------
 2 files changed, 227 insertions(+), 381 deletions(-)


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 60dc9297..468503c6 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -181,5 +181,6 @@
 #define xfs_btree_bload_compute_geometry libxfs_btree_bload_compute_geometry
 #define xfs_btree_bload			libxfs_btree_bload
 #define xfs_allocbt_stage_cursor	libxfs_allocbt_stage_cursor
+#define xfs_inobt_stage_cursor		libxfs_inobt_stage_cursor
 
 #endif /* __LIBXFS_API_DEFS_H__ */
diff --git a/repair/phase5.c b/repair/phase5.c
index 2421c4bc..1285527a 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -75,6 +75,10 @@ struct bt_rebuild {
 			struct extent_tree_node	*bno_rec;
 			xfs_agblock_t		*freeblks;
 		};
+		struct {
+			struct ino_tree_node	*ino_rec;
+			struct agi_stat		*agi_stat;
+		};
 	};
 };
 
@@ -764,48 +768,40 @@ _("Error %d while writing cntbt btree for AG %u.\n"), error, agno);
 	sc->tp = NULL;
 }
 
-/*
- * XXX(hch): any reason we don't just look at mp->m_inobt_mxr?
- */
-#define XR_INOBT_BLOCK_MAXRECS(mp, level) \
-			libxfs_inobt_maxrecs((mp), (mp)->m_sb.sb_blocksize, \
-						(level) == 0)
+/* Inode Btrees */
 
-/*
- * we don't have to worry here about how chewing up free extents
- * may perturb things because inode tree building happens before
- * freespace tree building.
- */
+/* Initialize both inode btree cursors as needed. */
 static void
-init_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
-		uint64_t *num_inos, uint64_t *num_free_inos, int finobt)
+init_ino_cursors(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	unsigned int		free_space,
+	uint64_t		*num_inos,
+	uint64_t		*num_free_inos,
+	struct bt_rebuild	*btr_ino,
+	struct bt_rebuild	*btr_fino)
 {
-	uint64_t		ninos;
-	uint64_t		nfinos;
-	int			rec_nfinos;
-	int			rec_ninos;
-	ino_tree_node_t		*ino_rec;
-	int			num_recs;
-	int			level;
-	bt_stat_level_t		*lptr;
-	bt_stat_level_t		*p_lptr;
-	xfs_extlen_t		blocks_allocated;
-	int			i;
+	struct xfs_btree_cur	*cur;
+	struct ino_tree_node	*ino_rec;
+	unsigned int		ino_recs = 0;
+	unsigned int		fino_recs = 0;
+	bool			finobt;
+	int			error;
 
-	*num_inos = *num_free_inos = 0;
-	ninos = nfinos = 0;
+	finobt = xfs_sb_version_hasfinobt(&sc->mp->m_sb);
+	init_rebuild(sc, &XFS_RMAP_OINFO_INOBT, free_space, btr_ino);
+	init_rebuild(sc, &XFS_RMAP_OINFO_INOBT, free_space, btr_fino);
 
-	lptr = &btree_curs->level[0];
-	btree_curs->init = 1;
-	btree_curs->owner = XFS_RMAP_OWN_INOBT;
+	/* Compute inode statistics. */
+	*num_free_inos = 0;
+	*num_inos = 0;
+	for (ino_rec = findfirst_inode_rec(agno);
+	     ino_rec != NULL;
+	     ino_rec = next_ino_rec(ino_rec))  {
+		unsigned int	rec_ninos = 0;
+		unsigned int	rec_nfinos = 0;
+		int		i;
 
-	/*
-	 * build up statistics
-	 */
-	ino_rec = findfirst_inode_rec(agno);
-	for (num_recs = 0; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec))  {
-		rec_ninos = 0;
-		rec_nfinos = 0;
 		for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
 			ASSERT(is_inode_confirmed(ino_rec, i));
 			/*
@@ -819,168 +815,222 @@ init_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
 			rec_ninos++;
 		}
 
-		/*
-		 * finobt only considers records with free inodes
-		 */
-		if (finobt && !rec_nfinos)
-			continue;
+		*num_free_inos += rec_nfinos;
+		*num_inos += rec_ninos;
+		ino_recs++;
 
-		nfinos += rec_nfinos;
-		ninos += rec_ninos;
-		num_recs++;
+		/* finobt only considers records with free inodes */
+		if (rec_nfinos)
+			fino_recs++;
 	}
 
-	if (num_recs == 0) {
-		/*
-		 * easy corner-case -- no inode records
-		 */
-		lptr->num_blocks = 1;
-		lptr->modulo = 0;
-		lptr->num_recs_pb = 0;
-		lptr->num_recs_tot = 0;
-
-		btree_curs->num_levels = 1;
-		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
+	/* Compute how many inobt blocks we'll need. */
+	cur = libxfs_inobt_stage_cursor(sc->mp, sc->tp,
+			&btr_ino->newbt.afake, agno, XFS_BTNUM_INO);
+	error = -libxfs_btree_bload_compute_geometry(cur, &btr_ino->bload,
+			ino_recs);
+	if (error)
+		do_error(
+_("Unable to compute inode btree geometry, error %d.\n"), error);
+	libxfs_btree_del_cursor(cur, error);
 
-		setup_cursor(mp, agno, btree_curs);
+	setup_rebuild(sc->mp, agno, btr_ino, btr_ino->bload.nr_blocks);
 
+	if (!finobt)
 		return;
-	}
 
-	blocks_allocated = lptr->num_blocks = howmany(num_recs,
-					XR_INOBT_BLOCK_MAXRECS(mp, 0));
+	/* Compute how many finobt blocks we'll need. */
+	cur = libxfs_inobt_stage_cursor(sc->mp, sc->tp,
+			&btr_fino->newbt.afake, agno, XFS_BTNUM_FINO);
+	error = -libxfs_btree_bload_compute_geometry(cur, &btr_fino->bload,
+			fino_recs);
+	if (error)
+		do_error(
+_("Unable to compute free inode btree geometry, error %d.\n"), error);
+	libxfs_btree_del_cursor(cur, error);
 
-	lptr->modulo = num_recs % lptr->num_blocks;
-	lptr->num_recs_pb = num_recs / lptr->num_blocks;
-	lptr->num_recs_tot = num_recs;
-	level = 1;
+	setup_rebuild(sc->mp, agno, btr_fino, btr_fino->bload.nr_blocks);
+}
 
-	if (lptr->num_blocks > 1)  {
-		for (; btree_curs->level[level-1].num_blocks > 1
-				&& level < XFS_BTREE_MAXLEVELS;
-				level++)  {
-			lptr = &btree_curs->level[level];
-			p_lptr = &btree_curs->level[level - 1];
-			lptr->num_blocks = howmany(p_lptr->num_blocks,
-				XR_INOBT_BLOCK_MAXRECS(mp, level));
-			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
-			lptr->num_recs_pb = p_lptr->num_blocks
-					/ lptr->num_blocks;
-			lptr->num_recs_tot = p_lptr->num_blocks;
+/* Copy one incore inode record into the inobt cursor. */
+static void
+get_inode_data(
+	struct xfs_btree_cur		*cur,
+	struct ino_tree_node		*ino_rec,
+	struct agi_stat			*agi_stat)
+{
+	struct xfs_inobt_rec_incore	*irec = &cur->bc_rec.i;
+	int				inocnt = 0;
+	int				finocnt = 0;
+	int				k;
 
-			blocks_allocated += lptr->num_blocks;
-		}
+	irec->ir_startino = ino_rec->ino_startnum;
+	irec->ir_free = ino_rec->ir_free;
+
+	for (k = 0; k < sizeof(xfs_inofree_t) * NBBY; k++)  {
+		ASSERT(is_inode_confirmed(ino_rec, k));
+
+		if (is_inode_sparse(ino_rec, k))
+			continue;
+		if (is_inode_free(ino_rec, k))
+			finocnt++;
+		inocnt++;
 	}
-	ASSERT(lptr->num_blocks == 1);
-	btree_curs->num_levels = level;
 
-	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
-			= blocks_allocated;
+	irec->ir_count = inocnt;
+	irec->ir_freecount = finocnt;
 
-	setup_cursor(mp, agno, btree_curs);
+	if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+		uint64_t		sparse;
+		int			spmask;
+		uint16_t		holemask;
+
+		/*
+		 * Convert the 64-bit in-core sparse inode state to the
+		 * 16-bit on-disk holemask.
+		 */
+		holemask = 0;
+		spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
+		sparse = ino_rec->ir_sparse;
+		for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
+			if (sparse & spmask) {
+				ASSERT((sparse & spmask) == spmask);
+				holemask |= (1 << k);
+			} else
+				ASSERT((sparse & spmask) == 0);
+			sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
+		}
 
-	*num_inos = ninos;
-	*num_free_inos = nfinos;
+		irec->ir_holemask = holemask;
+	} else {
+		irec->ir_holemask = 0;
+	}
 
-	return;
+	if (!agi_stat)
+		return;
+
+	if (agi_stat->first_agino != NULLAGINO)
+		agi_stat->first_agino = ino_rec->ino_startnum;
+	agi_stat->freecount += finocnt;
+	agi_stat->count += inocnt;
 }
 
-static void
-prop_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
-	xfs_btnum_t btnum, xfs_agino_t startino, int level)
+/* Grab one inobt record. */
+static int
+get_inobt_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
 {
-	struct xfs_btree_block	*bt_hdr;
-	xfs_inobt_key_t		*bt_key;
-	xfs_inobt_ptr_t		*bt_ptr;
-	xfs_agblock_t		agbno;
-	bt_stat_level_t		*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
+	struct bt_rebuild		*rebuild = priv;
 
-	level++;
+	get_inode_data(cur, rebuild->ino_rec, rebuild->agi_stat);
+	rebuild->ino_rec = next_ino_rec(rebuild->ino_rec);
+	return 0;
+}
 
-	if (level >= btree_curs->num_levels)
-		return;
+/* Rebuild a inobt btree. */
+static void
+build_inobt(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_ino,
+	struct agi_stat		*agi_stat)
+{
+	struct xfs_btree_cur	*cur;
+	int			error;
 
-	lptr = &btree_curs->level[level];
-	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+	btr_ino->bload.get_data = get_inobt_data;
+	btr_ino->bload.alloc_block = rebuild_alloc_block;
+	agi_stat->count = agi_stat->freecount = 0;
+	agi_stat->first_agino = NULLAGINO;
+	btr_ino->agi_stat = agi_stat;
+	btr_ino->ino_rec = findfirst_inode_rec(agno);
 
-	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
-		/*
-		 * this only happens once to initialize the
-		 * first path up the left side of the tree
-		 * where the agbno's are already set up
-		 */
-		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
-	}
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
+	if (error)
+		do_error(
+_("Insufficient memory to construct inobt rebuild transaction.\n"));
 
-	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
-				lptr->num_recs_pb + (lptr->modulo > 0))  {
-		/*
-		 * write out current prev block, grab us a new block,
-		 * and set the rightsib pointer of current block
-		 */
-#ifdef XR_BLD_INO_TRACE
-		fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
-#endif
-		if (lptr->prev_agbno != NULLAGBLOCK)  {
-			ASSERT(lptr->prev_buf_p != NULL);
-			libxfs_writebuf(lptr->prev_buf_p, 0);
-		}
-		lptr->prev_agbno = lptr->agbno;;
-		lptr->prev_buf_p = lptr->buf_p;
-		agbno = get_next_blockaddr(agno, level, btree_curs);
+	/* Add all observed inobt records. */
+	cur = libxfs_inobt_stage_cursor(sc->mp, sc->tp,
+			&btr_ino->newbt.afake, agno, XFS_BTNUM_INO);
+	error = -libxfs_btree_bload(cur, &btr_ino->bload, btr_ino);
+	if (error)
+		do_error(
+_("Error %d while creating inobt btree for AG %u.\n"), error, agno);
 
-		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
+	/* Since we're not writing the AGI yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(cur, 0);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing inobt btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
+}
 
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		lptr->agbno = agbno;
+/* Grab one finobt record. */
+static int
+get_finobt_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
+{
+	struct bt_rebuild		*rebuild = priv;
 
-		if (lptr->modulo)
-			lptr->modulo--;
+	get_inode_data(cur, rebuild->ino_rec, NULL);
+	rebuild->ino_rec = next_free_ino_rec(rebuild->ino_rec);
+	return 0;
+}
 
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum,
-					level, 0, agno);
+/* Rebuild a finobt btree. */
+static void
+build_finobt(
+	struct repair_ctx	*sc,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*btr_fino)
+{
+	struct xfs_btree_cur	*cur;
+	int			error;
 
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+	btr_fino->bload.get_data = get_finobt_data;
+	btr_fino->bload.alloc_block = rebuild_alloc_block;
+	btr_fino->ino_rec = findfirst_free_inode_rec(agno);
 
-		/*
-		 * propagate extent record for first extent in new block up
-		 */
-		prop_ino_cursor(mp, agno, btree_curs, btnum, startino, level);
-	}
-	/*
-	 * add inode info to current block
-	 */
-	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
+	if (error)
+		do_error(
+_("Insufficient memory to construct finobt rebuild transaction.\n"));
 
-	bt_key = XFS_INOBT_KEY_ADDR(mp, bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs));
-	bt_ptr = XFS_INOBT_PTR_ADDR(mp, bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs),
-				    M_IGEO(mp)->inobt_mxr[1]);
+	/* Add all observed finobt records. */
+	cur = libxfs_inobt_stage_cursor(sc->mp, sc->tp,
+			&btr_fino->newbt.afake, agno, XFS_BTNUM_FINO);
+	error = -libxfs_btree_bload(cur, &btr_fino->bload, btr_fino);
+	if (error)
+		do_error(
+_("Error %d while creating finobt btree for AG %u.\n"), error, agno);
 
-	bt_key->ir_startino = cpu_to_be32(startino);
-	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+	/* Since we're not writing the AGI yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(cur, 0);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing finobt btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
 }
 
 /*
  * XXX: yet more code that can be shared with mkfs, growfs.
  */
 static void
-build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
-		bt_status_t *finobt_curs, struct agi_stat *agi_stat)
+build_agi(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	struct bt_rebuild	*ino_bt,
+	struct bt_rebuild	*fino_bt,
+	struct agi_stat		*agi_stat)
 {
-	xfs_buf_t	*agi_buf;
-	xfs_agi_t	*agi;
-	int		i;
+	struct xfs_buf		*agi_buf;
+	struct xfs_agi		*agi;
+	int			i;
 
 	agi_buf = libxfs_getbuf(mp->m_dev,
 			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
@@ -998,8 +1048,8 @@ build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
 		agi->agi_length = cpu_to_be32(mp->m_sb.sb_dblocks -
 			(xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno);
 	agi->agi_count = cpu_to_be32(agi_stat->count);
-	agi->agi_root = cpu_to_be32(btree_curs->root);
-	agi->agi_level = cpu_to_be32(btree_curs->num_levels);
+	agi->agi_root = cpu_to_be32(ino_bt->newbt.afake.af_root);
+	agi->agi_level = cpu_to_be32(ino_bt->newbt.afake.af_levels);
 	agi->agi_freecount = cpu_to_be32(agi_stat->freecount);
 	agi->agi_newino = cpu_to_be32(agi_stat->first_agino);
 	agi->agi_dirino = cpu_to_be32(NULLAGINO);
@@ -1011,192 +1061,13 @@ build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
 		platform_uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
 
 	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-		agi->agi_free_root = cpu_to_be32(finobt_curs->root);
-		agi->agi_free_level = cpu_to_be32(finobt_curs->num_levels);
+		agi->agi_free_root = cpu_to_be32(fino_bt->newbt.afake.af_root);
+		agi->agi_free_level = cpu_to_be32(fino_bt->newbt.afake.af_levels);
 	}
 
 	libxfs_writebuf(agi_buf, 0);
 }
 
-/*
- * rebuilds an inode tree given a cursor.  We're lazy here and call
- * the routine that builds the agi
- */
-static void
-build_ino_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
-		bt_status_t *btree_curs, xfs_btnum_t btnum,
-		struct agi_stat *agi_stat)
-{
-	xfs_agnumber_t		i;
-	xfs_agblock_t		j;
-	xfs_agblock_t		agbno;
-	xfs_agino_t		first_agino;
-	struct xfs_btree_block	*bt_hdr;
-	xfs_inobt_rec_t		*bt_rec;
-	ino_tree_node_t		*ino_rec;
-	bt_stat_level_t		*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(btnum);
-	xfs_agino_t		count = 0;
-	xfs_agino_t		freecount = 0;
-	int			inocnt;
-	uint8_t			finocnt;
-	int			k;
-	int			level = btree_curs->num_levels;
-	int			spmask;
-	uint64_t		sparse;
-	uint16_t		holemask;
-
-	ASSERT(btnum == XFS_BTNUM_INO || btnum == XFS_BTNUM_FINO);
-
-	for (i = 0; i < level; i++)  {
-		lptr = &btree_curs->level[i];
-
-		agbno = get_next_blockaddr(agno, i, btree_curs);
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-
-		if (i == btree_curs->num_levels - 1)
-			btree_curs->root = agbno;
-
-		lptr->agbno = agbno;
-		lptr->prev_agbno = NULLAGBLOCK;
-		lptr->prev_buf_p = NULL;
-		/*
-		 * initialize block header
-		 */
-
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, i, 0, agno);
-	}
-
-	/*
-	 * run along leaf, setting up records.  as we have to switch
-	 * blocks, call the prop_ino_cursor routine to set up the new
-	 * pointers for the parent.  that can recurse up to the root
-	 * if required.  set the sibling pointers for leaf level here.
-	 */
-	if (btnum == XFS_BTNUM_FINO)
-		ino_rec = findfirst_free_inode_rec(agno);
-	else
-		ino_rec = findfirst_inode_rec(agno);
-
-	if (ino_rec != NULL)
-		first_agino = ino_rec->ino_startnum;
-	else
-		first_agino = NULLAGINO;
-
-	lptr = &btree_curs->level[0];
-
-	for (i = 0; i < lptr->num_blocks; i++)  {
-		/*
-		 * block initialization, lay in block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, btnum, 0, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-		bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
-							(lptr->modulo > 0));
-
-		if (lptr->modulo > 0)
-			lptr->modulo--;
-
-		if (lptr->num_recs_pb > 0)
-			prop_ino_cursor(mp, agno, btree_curs, btnum,
-					ino_rec->ino_startnum, 0);
-
-		bt_rec = (xfs_inobt_rec_t *)
-			  ((char *)bt_hdr + XFS_INOBT_BLOCK_LEN(mp));
-		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
-			ASSERT(ino_rec != NULL);
-			bt_rec[j].ir_startino =
-					cpu_to_be32(ino_rec->ino_startnum);
-			bt_rec[j].ir_free = cpu_to_be64(ino_rec->ir_free);
-
-			inocnt = finocnt = 0;
-			for (k = 0; k < sizeof(xfs_inofree_t)*NBBY; k++)  {
-				ASSERT(is_inode_confirmed(ino_rec, k));
-
-				if (is_inode_sparse(ino_rec, k))
-					continue;
-				if (is_inode_free(ino_rec, k))
-					finocnt++;
-				inocnt++;
-			}
-
-			/*
-			 * Set the freecount and check whether we need to update
-			 * the sparse format fields. Otherwise, skip to the next
-			 * record.
-			 */
-			inorec_set_freecount(mp, &bt_rec[j], finocnt);
-			if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
-				goto nextrec;
-
-			/*
-			 * Convert the 64-bit in-core sparse inode state to the
-			 * 16-bit on-disk holemask.
-			 */
-			holemask = 0;
-			spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
-			sparse = ino_rec->ir_sparse;
-			for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) {
-				if (sparse & spmask) {
-					ASSERT((sparse & spmask) == spmask);
-					holemask |= (1 << k);
-				} else
-					ASSERT((sparse & spmask) == 0);
-				sparse >>= XFS_INODES_PER_HOLEMASK_BIT;
-			}
-
-			bt_rec[j].ir_u.sp.ir_count = inocnt;
-			bt_rec[j].ir_u.sp.ir_holemask = cpu_to_be16(holemask);
-
-nextrec:
-			freecount += finocnt;
-			count += inocnt;
-
-			if (btnum == XFS_BTNUM_FINO)
-				ino_rec = next_free_ino_rec(ino_rec);
-			else
-				ino_rec = next_ino_rec(ino_rec);
-		}
-
-		if (ino_rec != NULL)  {
-			/*
-			 * get next leaf level block
-			 */
-			if (lptr->prev_buf_p != NULL)  {
-#ifdef XR_BLD_INO_TRACE
-				fprintf(stderr, "writing inobt agbno %u\n",
-					lptr->prev_agbno);
-#endif
-				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
-				libxfs_writebuf(lptr->prev_buf_p, 0);
-			}
-			lptr->prev_buf_p = lptr->buf_p;
-			lptr->prev_agbno = lptr->agbno;
-			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
-			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
-
-			lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		}
-	}
-
-	if (agi_stat) {
-		agi_stat->first_agino = first_agino;
-		agi_stat->count = count;
-		agi_stat->freecount = freecount;
-	}
-}
-
 /* rebuild the rmap tree */
 
 /*
@@ -2082,14 +1953,10 @@ phase5_func(
 {
 	struct repair_ctx	sc = { .mp = mp, };
 	struct agi_stat		agi_stat = {0,};
-	uint64_t		num_inos;
-	uint64_t		num_free_inos;
-	uint64_t		finobt_num_inos;
-	uint64_t		finobt_num_free_inos;
 	struct bt_rebuild	btr_bno;
 	struct bt_rebuild	btr_cnt;
-	bt_status_t		ino_btree_curs;
-	bt_status_t		fino_btree_curs;
+	struct bt_rebuild	btr_ino;
+	struct bt_rebuild	btr_fino;
 	bt_status_t		rmap_btree_curs;
 	bt_status_t		refcnt_btree_curs;
 	int			extra_blocks = 0;
@@ -2126,21 +1993,8 @@ phase5_func(
 			agno);
 	}
 
-	/*
-	 * ok, now set up the btree cursors for the
-	 * on-disk btrees (includs pre-allocating all
-	 * required blocks for the trees themselves)
-	 */
-	init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos,
-			&num_free_inos, 0);
-
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
-		init_ino_cursor(mp, agno, &fino_btree_curs,
-				&finobt_num_inos, &finobt_num_free_inos,
-				1);
-
-	sb_icount_ag[agno] += num_inos;
-	sb_ifree_ag[agno] += num_free_inos;
+	init_ino_cursors(&sc, agno, num_freeblocks, &sb_icount_ag[agno],
+			&sb_ifree_ag[agno], &btr_ino, &btr_fino);
 
 	/*
 	 * Set up the btree cursors for the on-disk rmap btrees,
@@ -2237,36 +2091,27 @@ phase5_func(
 			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
 
 	/*
-	 * build inode allocation tree.
+	 * build inode allocation trees.
 	 */
-	build_ino_tree(mp, agno, &ino_btree_curs, XFS_BTNUM_INO,
-			&agi_stat);
-	write_cursor(&ino_btree_curs);
-
-	/*
-	 * build free inode tree
-	 */
-	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-		build_ino_tree(mp, agno, &fino_btree_curs,
-				XFS_BTNUM_FINO, NULL);
-		write_cursor(&fino_btree_curs);
-	}
+	build_inobt(&sc, agno, &btr_ino, &agi_stat);
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		build_finobt(&sc, agno, &btr_fino);
 
 	/* build the agi */
-	build_agi(mp, agno, &ino_btree_curs, &fino_btree_curs,
-		  &agi_stat);
+	build_agi(mp, agno, &btr_ino, &btr_fino, &agi_stat);
 
 	/*
 	 * tear down cursors
 	 */
 	finish_rebuild(mp, &btr_bno, lost_fsb);
 	finish_rebuild(mp, &btr_cnt, lost_fsb);
+	finish_rebuild(mp, &btr_ino, lost_fsb);
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		finish_rebuild(mp, &btr_fino, lost_fsb);
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 		finish_cursor(&rmap_btree_curs);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
 		finish_cursor(&refcnt_btree_curs);
-	if (xfs_sb_version_hasfinobt(&mp->m_sb))
-		finish_cursor(&fino_btree_curs);
 
 	/*
 	 * release the incore per-AG bno/bcnt trees so


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 6/9] xfs_repair: rebuild reverse mapping btrees with bulk loader
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (4 preceding siblings ...)
  2020-01-01  1:21 ` [PATCH 5/9] xfs_repair: rebuild inode " Darrick J. Wong
@ 2020-01-01  1:22 ` Darrick J. Wong
  2020-01-01  1:22 ` [PATCH 7/9] xfs_repair: rebuild refcount " Darrick J. Wong
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:22 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the btree bulk loading functions to rebuild the reverse mapping
btrees and drop the open-coded implementation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    1 
 repair/phase5.c          |  410 ++++++++--------------------------------------
 2 files changed, 71 insertions(+), 340 deletions(-)


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 468503c6..4fc26d15 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -182,5 +182,6 @@
 #define xfs_btree_bload			libxfs_btree_bload
 #define xfs_allocbt_stage_cursor	libxfs_allocbt_stage_cursor
 #define xfs_inobt_stage_cursor		libxfs_inobt_stage_cursor
+#define xfs_rmapbt_stage_cursor		libxfs_rmapbt_stage_cursor
 
 #endif /* __LIBXFS_API_DEFS_H__ */
diff --git a/repair/phase5.c b/repair/phase5.c
index 1285527a..ef120b5e 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -1070,359 +1070,89 @@ build_agi(
 
 /* rebuild the rmap tree */
 
-/*
- * we don't have to worry here about how chewing up free extents
- * may perturb things because rmap tree building happens before
- * freespace tree building.
- */
+/* Set up the rmap rebuild parameters. */
 static void
 init_rmapbt_cursor(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs)
-{
-	size_t			num_recs;
-	int			level;
-	struct bt_stat_level	*lptr;
-	struct bt_stat_level	*p_lptr;
-	xfs_extlen_t		blocks_allocated;
-	int			maxrecs;
-
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) {
-		memset(btree_curs, 0, sizeof(struct bt_status));
-		return;
-	}
-
-	lptr = &btree_curs->level[0];
-	btree_curs->init = 1;
-	btree_curs->owner = XFS_RMAP_OWN_AG;
-
-	/*
-	 * build up statistics
-	 */
-	num_recs = rmap_record_count(mp, agno);
-	if (num_recs == 0) {
-		/*
-		 * easy corner-case -- no rmap records
-		 */
-		lptr->num_blocks = 1;
-		lptr->modulo = 0;
-		lptr->num_recs_pb = 0;
-		lptr->num_recs_tot = 0;
-
-		btree_curs->num_levels = 1;
-		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
-
-		setup_cursor(mp, agno, btree_curs);
-
-		return;
-	}
-
-	/*
-	 * Leave enough slack in the rmapbt that we can insert the
-	 * metadata AG entries without too many splits.
-	 */
-	maxrecs = mp->m_rmap_mxr[0];
-	if (num_recs > maxrecs)
-		maxrecs -= 10;
-	blocks_allocated = lptr->num_blocks = howmany(num_recs, maxrecs);
-
-	lptr->modulo = num_recs % lptr->num_blocks;
-	lptr->num_recs_pb = num_recs / lptr->num_blocks;
-	lptr->num_recs_tot = num_recs;
-	level = 1;
-
-	if (lptr->num_blocks > 1)  {
-		for (; btree_curs->level[level-1].num_blocks > 1
-				&& level < XFS_BTREE_MAXLEVELS;
-				level++)  {
-			lptr = &btree_curs->level[level];
-			p_lptr = &btree_curs->level[level - 1];
-			lptr->num_blocks = howmany(p_lptr->num_blocks,
-				mp->m_rmap_mxr[1]);
-			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
-			lptr->num_recs_pb = p_lptr->num_blocks
-					/ lptr->num_blocks;
-			lptr->num_recs_tot = p_lptr->num_blocks;
-
-			blocks_allocated += lptr->num_blocks;
-		}
-	}
-	ASSERT(lptr->num_blocks == 1);
-	btree_curs->num_levels = level;
-
-	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
-			= blocks_allocated;
-
-	setup_cursor(mp, agno, btree_curs);
-}
-
-static void
-prop_rmap_cursor(
-	struct xfs_mount	*mp,
+	struct repair_ctx	*sc,
 	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs,
-	struct xfs_rmap_irec	*rm_rec,
-	int			level)
+	unsigned int		free_space,
+	struct bt_rebuild	*btr)
 {
-	struct xfs_btree_block	*bt_hdr;
-	struct xfs_rmap_key	*bt_key;
-	xfs_rmap_ptr_t		*bt_ptr;
-	xfs_agblock_t		agbno;
-	struct bt_stat_level	*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(XFS_BTNUM_RMAP);
+	struct xfs_btree_cur	*rmap_cur;
+	int			error;
 
-	level++;
+	init_rebuild(sc, &XFS_RMAP_OINFO_AG, free_space, btr);
 
-	if (level >= btree_curs->num_levels)
+	if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb))
 		return;
 
-	lptr = &btree_curs->level[level];
-	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
-		/*
-		 * this only happens once to initialize the
-		 * first path up the left side of the tree
-		 * where the agbno's are already set up
-		 */
-		prop_rmap_cursor(mp, agno, btree_curs, rm_rec, level);
-	}
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
-				lptr->num_recs_pb + (lptr->modulo > 0))  {
-		/*
-		 * write out current prev block, grab us a new block,
-		 * and set the rightsib pointer of current block
-		 */
-#ifdef XR_BLD_INO_TRACE
-		fprintf(stderr, " rmap prop agbno %d ", lptr->prev_agbno);
-#endif
-		if (lptr->prev_agbno != NULLAGBLOCK)  {
-			ASSERT(lptr->prev_buf_p != NULL);
-			libxfs_writebuf(lptr->prev_buf_p, 0);
-		}
-		lptr->prev_agbno = lptr->agbno;
-		lptr->prev_buf_p = lptr->buf_p;
-		agbno = get_next_blockaddr(agno, level, btree_curs);
-
-		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
-
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		lptr->agbno = agbno;
-
-		if (lptr->modulo)
-			lptr->modulo--;
-
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_RMAP,
-					level, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-
-		/*
-		 * propagate extent record for first extent in new block up
-		 */
-		prop_rmap_cursor(mp, agno, btree_curs, rm_rec, level);
-	}
-	/*
-	 * add rmap info to current block
-	 */
-	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
-
-	bt_key = XFS_RMAP_KEY_ADDR(bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs));
-	bt_ptr = XFS_RMAP_PTR_ADDR(bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs),
-				    mp->m_rmap_mxr[1]);
-
-	bt_key->rm_startblock = cpu_to_be32(rm_rec->rm_startblock);
-	bt_key->rm_owner = cpu_to_be64(rm_rec->rm_owner);
-	bt_key->rm_offset = cpu_to_be64(rm_rec->rm_offset);
+	/* Compute how many blocks we'll need. */
+	rmap_cur = libxfs_rmapbt_stage_cursor(sc->mp, sc->tp,
+			&btr->newbt.afake, agno);
+	error = -libxfs_btree_bload_compute_geometry(rmap_cur, &btr->bload,
+			rmap_record_count(sc->mp, agno));
+	if (error)
+		do_error(
+_("Unable to compute rmap btree geometry, error %d.\n"), error);
+	libxfs_btree_del_cursor(rmap_cur, error);
 
-	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+	setup_rebuild(sc->mp, agno, btr, btr->bload.nr_blocks);
 }
 
-static void
-prop_rmap_highkey(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs,
-	struct xfs_rmap_irec	*rm_highkey)
+/* Grab one rmap record. */
+static int
+get_rmap_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
 {
-	struct xfs_btree_block	*bt_hdr;
-	struct xfs_rmap_key	*bt_key;
-	struct bt_stat_level	*lptr;
-	struct xfs_rmap_irec	key = {0};
-	struct xfs_rmap_irec	high_key;
-	int			level;
-	int			i;
-	int			numrecs;
+	struct xfs_rmap_irec		*rmap = &cur->bc_rec.r;
+	struct xfs_rmap_irec		*rec;
+	struct bt_rebuild		*btr = priv;
 
-	high_key = *rm_highkey;
-	for (level = 1; level < btree_curs->num_levels; level++) {
-		lptr = &btree_curs->level[level];
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		numrecs = be16_to_cpu(bt_hdr->bb_numrecs);
-		bt_key = XFS_RMAP_HIGH_KEY_ADDR(bt_hdr, numrecs);
-
-		bt_key->rm_startblock = cpu_to_be32(high_key.rm_startblock);
-		bt_key->rm_owner = cpu_to_be64(high_key.rm_owner);
-		bt_key->rm_offset = cpu_to_be64(
-				libxfs_rmap_irec_offset_pack(&high_key));
-
-		for (i = 1; i <= numrecs; i++) {
-			bt_key = XFS_RMAP_HIGH_KEY_ADDR(bt_hdr, i);
-			key.rm_startblock = be32_to_cpu(bt_key->rm_startblock);
-			key.rm_owner = be64_to_cpu(bt_key->rm_owner);
-			key.rm_offset = be64_to_cpu(bt_key->rm_offset);
-			if (rmap_diffkeys(&key, &high_key) > 0)
-				high_key = key;
-		}
-	}
+	rec = pop_slab_cursor(btr->slab_cursor);
+	memcpy(rmap, rec, sizeof(struct xfs_rmap_irec));
+	return 0;
 }
 
-/*
- * rebuilds a rmap btree given a cursor.
- */
+/* Rebuild a rmap btree. */
 static void
 build_rmap_tree(
-	struct xfs_mount	*mp,
+	struct repair_ctx	*sc,
 	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs)
+	struct bt_rebuild	*btr)
 {
-	xfs_agnumber_t		i;
-	xfs_agblock_t		j;
-	xfs_agblock_t		agbno;
-	struct xfs_btree_block	*bt_hdr;
-	struct xfs_rmap_irec	*rm_rec;
-	struct xfs_slab_cursor	*rmap_cur;
-	struct xfs_rmap_rec	*bt_rec;
-	struct xfs_rmap_irec	highest_key = {0};
-	struct xfs_rmap_irec	hi_key = {0};
-	struct bt_stat_level	*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(XFS_BTNUM_RMAP);
-	int			numrecs;
-	int			level = btree_curs->num_levels;
+	struct xfs_btree_cur	*rmap_cur;
 	int			error;
 
-	highest_key.rm_flags = 0;
-	for (i = 0; i < level; i++)  {
-		lptr = &btree_curs->level[i];
-
-		agbno = get_next_blockaddr(agno, i, btree_curs);
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-
-		if (i == btree_curs->num_levels - 1)
-			btree_curs->root = agbno;
-
-		lptr->agbno = agbno;
-		lptr->prev_agbno = NULLAGBLOCK;
-		lptr->prev_buf_p = NULL;
-		/*
-		 * initialize block header
-		 */
+	btr->bload.get_data = get_rmap_data;
+	btr->bload.alloc_block = rebuild_alloc_block;
 
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_RMAP,
-					i, 0, agno);
-	}
-
-	/*
-	 * run along leaf, setting up records.  as we have to switch
-	 * blocks, call the prop_rmap_cursor routine to set up the new
-	 * pointers for the parent.  that can recurse up to the root
-	 * if required.  set the sibling pointers for leaf level here.
-	 */
-	error = rmap_init_cursor(agno, &rmap_cur);
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
 	if (error)
 		do_error(
-_("Insufficient memory to construct reverse-map cursor."));
-	rm_rec = pop_slab_cursor(rmap_cur);
-	lptr = &btree_curs->level[0];
-
-	for (i = 0; i < lptr->num_blocks; i++)  {
-		numrecs = lptr->num_recs_pb + (lptr->modulo > 0);
-		ASSERT(rm_rec != NULL || numrecs == 0);
-
-		/*
-		 * block initialization, lay in block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_RMAP,
-					0, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-		bt_hdr->bb_numrecs = cpu_to_be16(numrecs);
-
-		if (lptr->modulo > 0)
-			lptr->modulo--;
-
-		if (lptr->num_recs_pb > 0) {
-			ASSERT(rm_rec != NULL);
-			prop_rmap_cursor(mp, agno, btree_curs, rm_rec, 0);
-		}
+_("Insufficient memory to construct rmap rebuild transaction.\n"));
 
-		bt_rec = (struct xfs_rmap_rec *)
-			  ((char *)bt_hdr + XFS_RMAP_BLOCK_LEN);
-		highest_key.rm_startblock = 0;
-		highest_key.rm_owner = 0;
-		highest_key.rm_offset = 0;
-		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
-			ASSERT(rm_rec != NULL);
-			bt_rec[j].rm_startblock =
-					cpu_to_be32(rm_rec->rm_startblock);
-			bt_rec[j].rm_blockcount =
-					cpu_to_be32(rm_rec->rm_blockcount);
-			bt_rec[j].rm_owner = cpu_to_be64(rm_rec->rm_owner);
-			bt_rec[j].rm_offset = cpu_to_be64(
-					libxfs_rmap_irec_offset_pack(rm_rec));
-			rmap_high_key_from_rec(rm_rec, &hi_key);
-			if (rmap_diffkeys(&hi_key, &highest_key) > 0)
-				highest_key = hi_key;
-
-			rm_rec = pop_slab_cursor(rmap_cur);
-		}
-
-		/* Now go set the parent key */
-		prop_rmap_highkey(mp, agno, btree_curs, &highest_key);
+	error = rmap_init_cursor(agno, &btr->slab_cursor);
+	if (error)
+		do_error(
+_("Insufficient memory to construct rmap cursor.\n"));
 
-		if (rm_rec != NULL)  {
-			/*
-			 * get next leaf level block
-			 */
-			if (lptr->prev_buf_p != NULL)  {
-#ifdef XR_BLD_RL_TRACE
-				fprintf(stderr, "writing rmapbt agbno %u\n",
-					lptr->prev_agbno);
-#endif
-				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
-				libxfs_writebuf(lptr->prev_buf_p, 0);
-			}
-			lptr->prev_buf_p = lptr->buf_p;
-			lptr->prev_agbno = lptr->agbno;
-			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
-			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
+	/* Add all observed rmap records. */
+	rmap_cur = libxfs_rmapbt_stage_cursor(sc->mp, sc->tp,
+			&btr->newbt.afake, agno);
+	error = -libxfs_btree_bload(rmap_cur, &btr->bload, btr);
+	if (error)
+		do_error(
+_("Error %d while creating rmap btree for AG %u.\n"), error, agno);
 
-			lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		}
-	}
-	free_slab_cursor(&rmap_cur);
+	/* Since we're not writing the AGF yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(rmap_cur, 0);
+	free_slab_cursor(&btr->slab_cursor);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing rmap btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
 }
 
 /* rebuild the refcount tree */
@@ -1753,7 +1483,7 @@ build_agf_agfl(
 	struct bt_rebuild	*btr_cnt,
 	xfs_extlen_t		freeblks,	/* # free blocks in tree */
 	int			lostblocks,	/* # blocks that will be lost */
-	struct bt_status	*rmap_bt,
+	struct bt_rebuild	*btr_rmap,
 	struct bt_status	*refcnt_bt,
 	struct xfs_slab		*lost_fsb)
 {
@@ -1797,11 +1527,12 @@ build_agf_agfl(
 			cpu_to_be32(btr_cnt->newbt.afake.af_root);
 	agf->agf_levels[XFS_BTNUM_CNT] =
 			cpu_to_be32(btr_cnt->newbt.afake.af_levels);
-	agf->agf_roots[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->root);
-	agf->agf_levels[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->num_levels);
+	agf->agf_roots[XFS_BTNUM_RMAP] =
+			cpu_to_be32(btr_rmap->newbt.afake.af_root);
+	agf->agf_levels[XFS_BTNUM_RMAP] =
+			cpu_to_be32(btr_rmap->newbt.afake.af_levels);
 	agf->agf_freeblks = cpu_to_be32(freeblks);
-	agf->agf_rmap_blocks = cpu_to_be32(rmap_bt->num_tot_blocks -
-			rmap_bt->num_free_blocks);
+	agf->agf_rmap_blocks = cpu_to_be32(btr_rmap->newbt.afake.af_blocks);
 	agf->agf_refcount_root = cpu_to_be32(refcnt_bt->root);
 	agf->agf_refcount_level = cpu_to_be32(refcnt_bt->num_levels);
 	agf->agf_refcount_blocks = cpu_to_be32(refcnt_bt->num_tot_blocks -
@@ -1819,7 +1550,7 @@ build_agf_agfl(
 		blks = btr_bno->newbt.afake.af_blocks +
 			btr_cnt->newbt.afake.af_blocks - 2;
 		if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-			blks += rmap_bt->num_tot_blocks - rmap_bt->num_free_blocks - 1;
+			blks += btr_rmap->newbt.afake.af_blocks - 1;
 		agf->agf_btreeblks = cpu_to_be32(blks);
 #ifdef XR_BLD_FREE_TRACE
 		fprintf(stderr, "agf->agf_btreeblks = %u\n",
@@ -1860,6 +1591,7 @@ build_agf_agfl(
 	/* Fill the AGFL with leftover blocks or save them for later. */
 	fill_agfl(btr_bno, freelist, &i);
 	fill_agfl(btr_cnt, freelist, &i);
+	fill_agfl(btr_rmap, freelist, &i);
 
 	/* Set the AGF counters for the AGFL. */
 	if (i > 0) {
@@ -1957,7 +1689,7 @@ phase5_func(
 	struct bt_rebuild	btr_cnt;
 	struct bt_rebuild	btr_ino;
 	struct bt_rebuild	btr_fino;
-	bt_status_t		rmap_btree_curs;
+	struct bt_rebuild	btr_rmap;
 	bt_status_t		refcnt_btree_curs;
 	int			extra_blocks = 0;
 	uint			num_freeblocks;
@@ -2000,7 +1732,7 @@ phase5_func(
 	 * Set up the btree cursors for the on-disk rmap btrees,
 	 * which includes pre-allocating all required blocks.
 	 */
-	init_rmapbt_cursor(mp, agno, &rmap_btree_curs);
+	init_rmapbt_cursor(&sc, agno, num_freeblocks, &btr_rmap);
 
 	/*
 	 * Set up the btree cursors for the on-disk refcount btrees,
@@ -2073,10 +1805,8 @@ phase5_func(
 	ASSERT(freeblks1 == freeblks2);
 
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
-		build_rmap_tree(mp, agno, &rmap_btree_curs);
-		write_cursor(&rmap_btree_curs);
-		sb_fdblocks_ag[agno] += (rmap_btree_curs.num_tot_blocks -
-				rmap_btree_curs.num_free_blocks) - 1;
+		build_rmap_tree(&sc, agno, &btr_rmap);
+		sb_fdblocks_ag[agno] += btr_rmap.newbt.afake.af_blocks - 1;
 	}
 
 	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
@@ -2088,7 +1818,7 @@ phase5_func(
 	 * set up agf and agfl
 	 */
 	build_agf_agfl(mp, agno, &btr_bno, &btr_cnt, freeblks1, extra_blocks,
-			&rmap_btree_curs, &refcnt_btree_curs, lost_fsb);
+			&btr_rmap, &refcnt_btree_curs, lost_fsb);
 
 	/*
 	 * build inode allocation trees.
@@ -2109,7 +1839,7 @@ phase5_func(
 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
 		finish_rebuild(mp, &btr_fino, lost_fsb);
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-		finish_cursor(&rmap_btree_curs);
+		finish_rebuild(mp, &btr_rmap, lost_fsb);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
 		finish_cursor(&refcnt_btree_curs);
 


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 7/9] xfs_repair: rebuild refcount btrees with bulk loader
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (5 preceding siblings ...)
  2020-01-01  1:22 ` [PATCH 6/9] xfs_repair: rebuild reverse mapping " Darrick J. Wong
@ 2020-01-01  1:22 ` Darrick J. Wong
  2020-01-01  1:22 ` [PATCH 8/9] xfs_repair: remove old btree rebuild support code Darrick J. Wong
  2020-01-01  1:22 ` [PATCH 9/9] xfs_repair: track blocks lost during btree construction via extents Darrick J. Wong
  8 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:22 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use the btree bulk loading functions to rebuild the refcount btrees
and drop the open-coded implementation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    1 
 repair/phase5.c          |  347 +++++++++-------------------------------------
 2 files changed, 72 insertions(+), 276 deletions(-)


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 4fc26d15..72605d4d 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -183,5 +183,6 @@
 #define xfs_allocbt_stage_cursor	libxfs_allocbt_stage_cursor
 #define xfs_inobt_stage_cursor		libxfs_inobt_stage_cursor
 #define xfs_rmapbt_stage_cursor		libxfs_rmapbt_stage_cursor
+#define xfs_refcountbt_stage_cursor	libxfs_refcountbt_stage_cursor
 
 #endif /* __LIBXFS_API_DEFS_H__ */
diff --git a/repair/phase5.c b/repair/phase5.c
index ef120b5e..ee4a4563 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -1157,295 +1157,89 @@ _("Error %d while writing rmap btree for AG %u.\n"), error, agno);
 
 /* rebuild the refcount tree */
 
-/*
- * we don't have to worry here about how chewing up free extents
- * may perturb things because reflink tree building happens before
- * freespace tree building.
- */
+/* Set up the refcount rebuild parameters. */
 static void
 init_refc_cursor(
-	struct xfs_mount	*mp,
+	struct repair_ctx	*sc,
 	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs)
+	unsigned int		free_space,
+	struct bt_rebuild	*btr)
 {
-	size_t			num_recs;
-	int			level;
-	struct bt_stat_level	*lptr;
-	struct bt_stat_level	*p_lptr;
-	xfs_extlen_t		blocks_allocated;
-
-	if (!xfs_sb_version_hasreflink(&mp->m_sb)) {
-		memset(btree_curs, 0, sizeof(struct bt_status));
-		return;
-	}
-
-	lptr = &btree_curs->level[0];
-	btree_curs->init = 1;
-	btree_curs->owner = XFS_RMAP_OWN_REFC;
-
-	/*
-	 * build up statistics
-	 */
-	num_recs = refcount_record_count(mp, agno);
-	if (num_recs == 0) {
-		/*
-		 * easy corner-case -- no refcount records
-		 */
-		lptr->num_blocks = 1;
-		lptr->modulo = 0;
-		lptr->num_recs_pb = 0;
-		lptr->num_recs_tot = 0;
-
-		btree_curs->num_levels = 1;
-		btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
+	struct xfs_btree_cur	*refc_cur;
+	int			error;
 
-		setup_cursor(mp, agno, btree_curs);
+	init_rebuild(sc, &XFS_RMAP_OINFO_REFC, free_space, btr);
 
+	if (!xfs_sb_version_hasreflink(&sc->mp->m_sb))
 		return;
-	}
 
-	blocks_allocated = lptr->num_blocks = howmany(num_recs,
-					mp->m_refc_mxr[0]);
-
-	lptr->modulo = num_recs % lptr->num_blocks;
-	lptr->num_recs_pb = num_recs / lptr->num_blocks;
-	lptr->num_recs_tot = num_recs;
-	level = 1;
-
-	if (lptr->num_blocks > 1)  {
-		for (; btree_curs->level[level-1].num_blocks > 1
-				&& level < XFS_BTREE_MAXLEVELS;
-				level++)  {
-			lptr = &btree_curs->level[level];
-			p_lptr = &btree_curs->level[level - 1];
-			lptr->num_blocks = howmany(p_lptr->num_blocks,
-					mp->m_refc_mxr[1]);
-			lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
-			lptr->num_recs_pb = p_lptr->num_blocks
-					/ lptr->num_blocks;
-			lptr->num_recs_tot = p_lptr->num_blocks;
-
-			blocks_allocated += lptr->num_blocks;
-		}
-	}
-	ASSERT(lptr->num_blocks == 1);
-	btree_curs->num_levels = level;
-
-	btree_curs->num_tot_blocks = btree_curs->num_free_blocks
-			= blocks_allocated;
+	/* Compute how many blocks we'll need. */
+	refc_cur = libxfs_refcountbt_stage_cursor(sc->mp, sc->tp,
+			&btr->newbt.afake, agno);
+	error = -libxfs_btree_bload_compute_geometry(refc_cur, &btr->bload,
+			refcount_record_count(sc->mp, agno));
+	if (error)
+		do_error(
+_("Unable to compute refcount btree geometry, error %d.\n"), error);
+	libxfs_btree_del_cursor(refc_cur, error);
 
-	setup_cursor(mp, agno, btree_curs);
+	setup_rebuild(sc->mp, agno, btr, btr->bload.nr_blocks);
 }
 
-static void
-prop_refc_cursor(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs,
-	xfs_agblock_t		startbno,
-	int			level)
+/* Grab one refcount record. */
+static int
+get_refcount_data(
+	struct xfs_btree_cur		*cur,
+	void				*priv)
 {
-	struct xfs_btree_block	*bt_hdr;
-	struct xfs_refcount_key	*bt_key;
-	xfs_refcount_ptr_t	*bt_ptr;
-	xfs_agblock_t		agbno;
-	struct bt_stat_level	*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(XFS_BTNUM_REFC);
-
-	level++;
-
-	if (level >= btree_curs->num_levels)
-		return;
-
-	lptr = &btree_curs->level[level];
-	bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) == 0)  {
-		/*
-		 * this only happens once to initialize the
-		 * first path up the left side of the tree
-		 * where the agbno's are already set up
-		 */
-		prop_refc_cursor(mp, agno, btree_curs, startbno, level);
-	}
-
-	if (be16_to_cpu(bt_hdr->bb_numrecs) ==
-				lptr->num_recs_pb + (lptr->modulo > 0))  {
-		/*
-		 * write out current prev block, grab us a new block,
-		 * and set the rightsib pointer of current block
-		 */
-#ifdef XR_BLD_INO_TRACE
-		fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
-#endif
-		if (lptr->prev_agbno != NULLAGBLOCK)  {
-			ASSERT(lptr->prev_buf_p != NULL);
-			libxfs_writebuf(lptr->prev_buf_p, 0);
-		}
-		lptr->prev_agbno = lptr->agbno;
-		lptr->prev_buf_p = lptr->buf_p;
-		agbno = get_next_blockaddr(agno, level, btree_curs);
-
-		bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
-
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		lptr->agbno = agbno;
-
-		if (lptr->modulo)
-			lptr->modulo--;
-
-		/*
-		 * initialize block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_REFC,
-					level, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-
-		/*
-		 * propagate extent record for first extent in new block up
-		 */
-		prop_refc_cursor(mp, agno, btree_curs, startbno, level);
-	}
-	/*
-	 * add inode info to current block
-	 */
-	be16_add_cpu(&bt_hdr->bb_numrecs, 1);
-
-	bt_key = XFS_REFCOUNT_KEY_ADDR(bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs));
-	bt_ptr = XFS_REFCOUNT_PTR_ADDR(bt_hdr,
-				    be16_to_cpu(bt_hdr->bb_numrecs),
-				    mp->m_refc_mxr[1]);
+	struct xfs_refcount_irec	*refc = &cur->bc_rec.rc;
+	struct xfs_refcount_irec	*rec;
+	struct bt_rebuild		*btr = priv;
 
-	bt_key->rc_startblock = cpu_to_be32(startbno);
-	*bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+	rec = pop_slab_cursor(btr->slab_cursor);
+	memcpy(refc, rec, sizeof(struct xfs_refcount_irec));
+	return 0;
 }
 
-/*
- * rebuilds a refcount btree given a cursor.
- */
+/* Rebuild a refcount btree. */
 static void
 build_refcount_tree(
-	struct xfs_mount	*mp,
+	struct repair_ctx	*sc,
 	xfs_agnumber_t		agno,
-	struct bt_status	*btree_curs)
+	struct bt_rebuild	*btr)
 {
-	xfs_agnumber_t		i;
-	xfs_agblock_t		j;
-	xfs_agblock_t		agbno;
-	struct xfs_btree_block	*bt_hdr;
-	struct xfs_refcount_irec	*refc_rec;
-	struct xfs_slab_cursor	*refc_cur;
-	struct xfs_refcount_rec	*bt_rec;
-	struct bt_stat_level	*lptr;
-	const struct xfs_buf_ops *ops = btnum_to_ops(XFS_BTNUM_REFC);
-	int			numrecs;
-	int			level = btree_curs->num_levels;
+	struct xfs_btree_cur	*refc_cur;
 	int			error;
 
-	for (i = 0; i < level; i++)  {
-		lptr = &btree_curs->level[i];
-
-		agbno = get_next_blockaddr(agno, i, btree_curs);
-		lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, agbno),
-					XFS_FSB_TO_BB(mp, 1));
-
-		if (i == btree_curs->num_levels - 1)
-			btree_curs->root = agbno;
-
-		lptr->agbno = agbno;
-		lptr->prev_agbno = NULLAGBLOCK;
-		lptr->prev_buf_p = NULL;
-		/*
-		 * initialize block header
-		 */
-
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_REFC,
-					i, 0, agno);
-	}
+	btr->bload.get_data = get_refcount_data;
+	btr->bload.alloc_block = rebuild_alloc_block;
 
-	/*
-	 * run along leaf, setting up records.  as we have to switch
-	 * blocks, call the prop_refc_cursor routine to set up the new
-	 * pointers for the parent.  that can recurse up to the root
-	 * if required.  set the sibling pointers for leaf level here.
-	 */
-	error = init_refcount_cursor(agno, &refc_cur);
+	error = -libxfs_trans_alloc_empty(sc->mp, &sc->tp);
 	if (error)
 		do_error(
-_("Insufficient memory to construct refcount cursor."));
-	refc_rec = pop_slab_cursor(refc_cur);
-	lptr = &btree_curs->level[0];
+_("Insufficient memory to construct refcount rebuild transaction.\n"));
 
-	for (i = 0; i < lptr->num_blocks; i++)  {
-		numrecs = lptr->num_recs_pb + (lptr->modulo > 0);
-		ASSERT(refc_rec != NULL || numrecs == 0);
+	error = init_refcount_cursor(agno, &btr->slab_cursor);
+	if (error)
+		do_error(
+_("Insufficient memory to construct refcount cursor.\n"));
 
-		/*
-		 * block initialization, lay in block header
-		 */
-		lptr->buf_p->b_ops = ops;
-		bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
-		memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
-		libxfs_btree_init_block(mp, lptr->buf_p, XFS_BTNUM_REFC,
-					0, 0, agno);
-
-		bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
-		bt_hdr->bb_numrecs = cpu_to_be16(numrecs);
-
-		if (lptr->modulo > 0)
-			lptr->modulo--;
-
-		if (lptr->num_recs_pb > 0)
-			prop_refc_cursor(mp, agno, btree_curs,
-					refc_rec->rc_startblock, 0);
-
-		bt_rec = (struct xfs_refcount_rec *)
-			  ((char *)bt_hdr + XFS_REFCOUNT_BLOCK_LEN);
-		for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
-			ASSERT(refc_rec != NULL);
-			bt_rec[j].rc_startblock =
-					cpu_to_be32(refc_rec->rc_startblock);
-			bt_rec[j].rc_blockcount =
-					cpu_to_be32(refc_rec->rc_blockcount);
-			bt_rec[j].rc_refcount = cpu_to_be32(refc_rec->rc_refcount);
-
-			refc_rec = pop_slab_cursor(refc_cur);
-		}
+	/* Add all observed refcount records. */
+	refc_cur = libxfs_refcountbt_stage_cursor(sc->mp, sc->tp,
+			&btr->newbt.afake, agno);
+	error = -libxfs_btree_bload(refc_cur, &btr->bload, btr);
+	if (error)
+		do_error(
+_("Error %d while creating refcount btree for AG %u.\n"), error, agno);
 
-		if (refc_rec != NULL)  {
-			/*
-			 * get next leaf level block
-			 */
-			if (lptr->prev_buf_p != NULL)  {
-#ifdef XR_BLD_RL_TRACE
-				fprintf(stderr, "writing refcntbt agbno %u\n",
-					lptr->prev_agbno);
-#endif
-				ASSERT(lptr->prev_agbno != NULLAGBLOCK);
-				libxfs_writebuf(lptr->prev_buf_p, 0);
-			}
-			lptr->prev_buf_p = lptr->buf_p;
-			lptr->prev_agbno = lptr->agbno;
-			lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
-			bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
-
-			lptr->buf_p = libxfs_getbuf(mp->m_dev,
-					XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
-					XFS_FSB_TO_BB(mp, 1));
-		}
-	}
-	free_slab_cursor(&refc_cur);
+	/* Since we're not writing the AGF yet, no need to commit the cursor */
+	libxfs_btree_del_cursor(refc_cur, 0);
+	free_slab_cursor(&btr->slab_cursor);
+	error = -libxfs_trans_commit(sc->tp);
+	if (error)
+		do_error(
+_("Error %d while writing refcount btree for AG %u.\n"), error, agno);
+	sc->tp = NULL;
 }
 
 /* Fill the AGFL with any leftover bnobt rebuilder blocks. */
@@ -1484,7 +1278,7 @@ build_agf_agfl(
 	xfs_extlen_t		freeblks,	/* # free blocks in tree */
 	int			lostblocks,	/* # blocks that will be lost */
 	struct bt_rebuild	*btr_rmap,
-	struct bt_status	*refcnt_bt,
+	struct bt_rebuild	*btr_refcount,
 	struct xfs_slab		*lost_fsb)
 {
 	struct extent_tree_node	*ext_ptr;
@@ -1532,11 +1326,14 @@ build_agf_agfl(
 	agf->agf_levels[XFS_BTNUM_RMAP] =
 			cpu_to_be32(btr_rmap->newbt.afake.af_levels);
 	agf->agf_freeblks = cpu_to_be32(freeblks);
-	agf->agf_rmap_blocks = cpu_to_be32(btr_rmap->newbt.afake.af_blocks);
-	agf->agf_refcount_root = cpu_to_be32(refcnt_bt->root);
-	agf->agf_refcount_level = cpu_to_be32(refcnt_bt->num_levels);
-	agf->agf_refcount_blocks = cpu_to_be32(refcnt_bt->num_tot_blocks -
-			refcnt_bt->num_free_blocks);
+	agf->agf_rmap_blocks =
+			cpu_to_be32(btr_rmap->newbt.afake.af_blocks);
+	agf->agf_refcount_root =
+			cpu_to_be32(btr_refcount->newbt.afake.af_root);
+	agf->agf_refcount_level =
+			cpu_to_be32(btr_refcount->newbt.afake.af_levels);
+	agf->agf_refcount_blocks =
+			cpu_to_be32(btr_refcount->newbt.afake.af_blocks);
 
 	/*
 	 * Count and record the number of btree blocks consumed if required.
@@ -1690,7 +1487,7 @@ phase5_func(
 	struct bt_rebuild	btr_ino;
 	struct bt_rebuild	btr_fino;
 	struct bt_rebuild	btr_rmap;
-	bt_status_t		refcnt_btree_curs;
+	struct bt_rebuild	btr_refcount;
 	int			extra_blocks = 0;
 	uint			num_freeblocks;
 	xfs_extlen_t		freeblks1;
@@ -1738,7 +1535,7 @@ phase5_func(
 	 * Set up the btree cursors for the on-disk refcount btrees,
 	 * which includes pre-allocating all required blocks.
 	 */
-	init_refc_cursor(mp, agno, &refcnt_btree_curs);
+	init_refc_cursor(&sc, agno, num_freeblocks, &btr_refcount);
 
 	num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
 	/*
@@ -1809,16 +1606,14 @@ phase5_func(
 		sb_fdblocks_ag[agno] += btr_rmap.newbt.afake.af_blocks - 1;
 	}
 
-	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
-		build_refcount_tree(mp, agno, &refcnt_btree_curs);
-		write_cursor(&refcnt_btree_curs);
-	}
+	if (xfs_sb_version_hasreflink(&mp->m_sb))
+		build_refcount_tree(&sc, agno, &btr_refcount);
 
 	/*
 	 * set up agf and agfl
 	 */
 	build_agf_agfl(mp, agno, &btr_bno, &btr_cnt, freeblks1, extra_blocks,
-			&btr_rmap, &refcnt_btree_curs, lost_fsb);
+			&btr_rmap, &btr_refcount, lost_fsb);
 
 	/*
 	 * build inode allocation trees.
@@ -1841,7 +1636,7 @@ phase5_func(
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 		finish_rebuild(mp, &btr_rmap, lost_fsb);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
-		finish_cursor(&refcnt_btree_curs);
+		finish_rebuild(mp, &btr_refcount, lost_fsb);
 
 	/*
 	 * release the incore per-AG bno/bcnt trees so


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 8/9] xfs_repair: remove old btree rebuild support code
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (6 preceding siblings ...)
  2020-01-01  1:22 ` [PATCH 7/9] xfs_repair: rebuild refcount " Darrick J. Wong
@ 2020-01-01  1:22 ` Darrick J. Wong
  2020-01-01  1:22 ` [PATCH 9/9] xfs_repair: track blocks lost during btree construction via extents Darrick J. Wong
  8 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:22 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

This code isn't needed anymore, so get rid of it.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |  240 -------------------------------------------------------
 1 file changed, 240 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index ee4a4563..94fc17d8 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -20,52 +20,6 @@
 #include "rmap.h"
 #include "bload.h"
 
-/*
- * we maintain the current slice (path from root to leaf)
- * of the btree incore.  when we need a new block, we ask
- * the block allocator for the address of a block on that
- * level, map the block in, and set up the appropriate
- * pointers (child, silbing, etc.) and keys that should
- * point to the new block.
- */
-typedef struct bt_stat_level  {
-	/*
-	 * set in setup_cursor routine and maintained in the tree-building
-	 * routines
-	 */
-	xfs_buf_t		*buf_p;		/* 2 buffer pointers to ... */
-	xfs_buf_t		*prev_buf_p;
-	xfs_agblock_t		agbno;		/* current block being filled */
-	xfs_agblock_t		prev_agbno;	/* previous block */
-	/*
-	 * set in calculate/init cursor routines for each btree level
-	 */
-	int			num_recs_tot;	/* # tree recs in level */
-	int			num_blocks;	/* # tree blocks in level */
-	int			num_recs_pb;	/* num_recs_tot / num_blocks */
-	int			modulo;		/* num_recs_tot % num_blocks */
-} bt_stat_level_t;
-
-typedef struct bt_status  {
-	int			init;		/* cursor set up once? */
-	int			num_levels;	/* # of levels in btree */
-	xfs_extlen_t		num_tot_blocks;	/* # blocks alloc'ed for tree */
-	xfs_extlen_t		num_free_blocks;/* # blocks currently unused */
-
-	xfs_agblock_t		root;		/* root block */
-	/*
-	 * list of blocks to be used to set up this tree
-	 * and pointer to the first unused block on the list
-	 */
-	xfs_agblock_t		*btree_blocks;		/* block list */
-	xfs_agblock_t		*free_btree_blocks;	/* first unused block */
-	/*
-	 * per-level status info
-	 */
-	bt_stat_level_t		level[XFS_BTREE_MAXLEVELS];
-	uint64_t		owner;		/* owner */
-} bt_status_t;
-
 struct bt_rebuild {
 	struct xrep_newbt	newbt;
 	struct xfs_btree_bload	bload;
@@ -188,148 +142,6 @@ mk_incore_fstree(
 	return(num_extents);
 }
 
-static xfs_agblock_t
-get_next_blockaddr(xfs_agnumber_t agno, int level, bt_status_t *curs)
-{
-	ASSERT(curs->free_btree_blocks < curs->btree_blocks +
-						curs->num_tot_blocks);
-	ASSERT(curs->num_free_blocks > 0);
-
-	curs->num_free_blocks--;
-	return(*curs->free_btree_blocks++);
-}
-
-/*
- * set up the dynamically allocated block allocation data in the btree
- * cursor that depends on the info in the static portion of the cursor.
- * allocates space from the incore bno/bcnt extent trees and sets up
- * the first path up the left side of the tree.  Also sets up the
- * cursor pointer to the btree root.   called by init_freespace_cursor()
- * and init_ino_cursor()
- */
-static void
-setup_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *curs)
-{
-	int			j;
-	unsigned int		u;
-	xfs_extlen_t		big_extent_len;
-	xfs_agblock_t		big_extent_start;
-	extent_tree_node_t	*ext_ptr;
-	extent_tree_node_t	*bno_ext_ptr;
-	xfs_extlen_t		blocks_allocated;
-	xfs_agblock_t		*agb_ptr;
-	int			error;
-
-	/*
-	 * get the number of blocks we need to allocate, then
-	 * set up block number array, set the free block pointer
-	 * to the first block in the array, and null the array
-	 */
-	big_extent_len = curs->num_tot_blocks;
-	blocks_allocated = 0;
-
-	ASSERT(big_extent_len > 0);
-
-	if ((curs->btree_blocks = malloc(sizeof(xfs_agblock_t)
-					* big_extent_len)) == NULL)
-		do_error(_("could not set up btree block array\n"));
-
-	agb_ptr = curs->free_btree_blocks = curs->btree_blocks;
-
-	for (j = 0; j < curs->num_free_blocks; j++, agb_ptr++)
-		*agb_ptr = NULLAGBLOCK;
-
-	/*
-	 * grab the smallest extent and use it up, then get the
-	 * next smallest.  This mimics the init_*_cursor code.
-	 */
-	ext_ptr =  findfirst_bcnt_extent(agno);
-
-	agb_ptr = curs->btree_blocks;
-
-	/*
-	 * set up the free block array
-	 */
-	while (blocks_allocated < big_extent_len)  {
-		if (!ext_ptr)
-			do_error(
-_("error - not enough free space in filesystem\n"));
-		/*
-		 * use up the extent we've got
-		 */
-		for (u = 0; u < ext_ptr->ex_blockcount &&
-				blocks_allocated < big_extent_len; u++)  {
-			ASSERT(agb_ptr < curs->btree_blocks
-					+ curs->num_tot_blocks);
-			*agb_ptr++ = ext_ptr->ex_startblock + u;
-			blocks_allocated++;
-		}
-
-		error = rmap_add_ag_rec(mp, agno, ext_ptr->ex_startblock, u,
-				curs->owner);
-		if (error)
-			do_error(_("could not set up btree rmaps: %s\n"),
-				strerror(-error));
-
-		/*
-		 * if we only used part of this last extent, then we
-		 * need only to reset the extent in the extent
-		 * trees and we're done
-		 */
-		if (u < ext_ptr->ex_blockcount)  {
-			big_extent_start = ext_ptr->ex_startblock + u;
-			big_extent_len = ext_ptr->ex_blockcount - u;
-
-			ASSERT(big_extent_len > 0);
-
-			bno_ext_ptr = find_bno_extent(agno,
-						ext_ptr->ex_startblock);
-			ASSERT(bno_ext_ptr != NULL);
-			get_bno_extent(agno, bno_ext_ptr);
-			release_extent_tree_node(bno_ext_ptr);
-
-			ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
-					ext_ptr->ex_blockcount);
-			release_extent_tree_node(ext_ptr);
-#ifdef XR_BLD_FREE_TRACE
-			fprintf(stderr, "releasing extent: %u [%u %u]\n",
-				agno, ext_ptr->ex_startblock,
-				ext_ptr->ex_blockcount);
-			fprintf(stderr, "blocks_allocated = %d\n",
-				blocks_allocated);
-#endif
-
-			add_bno_extent(agno, big_extent_start, big_extent_len);
-			add_bcnt_extent(agno, big_extent_start, big_extent_len);
-
-			return;
-		}
-		/*
-		 * delete the used-up extent from both extent trees and
-		 * find next biggest extent
-		 */
-#ifdef XR_BLD_FREE_TRACE
-		fprintf(stderr, "releasing extent: %u [%u %u]\n",
-			agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
-#endif
-		bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
-		ASSERT(bno_ext_ptr != NULL);
-		get_bno_extent(agno, bno_ext_ptr);
-		release_extent_tree_node(bno_ext_ptr);
-
-		ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
-				ext_ptr->ex_blockcount);
-		ASSERT(ext_ptr != NULL);
-		release_extent_tree_node(ext_ptr);
-
-		ext_ptr = findfirst_bcnt_extent(agno);
-	}
-#ifdef XR_BLD_FREE_TRACE
-	fprintf(stderr, "blocks_allocated = %d\n",
-		blocks_allocated);
-#endif
-}
-
 /*
  * Estimate proper slack values for a btree that's being reloaded.
  *
@@ -481,34 +293,6 @@ rebuild_alloc_block(
 	return xrep_newbt_alloc_block(cur, &btr->newbt, ptr);
 }
 
-static void
-write_cursor(bt_status_t *curs)
-{
-	int i;
-
-	for (i = 0; i < curs->num_levels; i++)  {
-#if defined(XR_BLD_FREE_TRACE) || defined(XR_BLD_INO_TRACE)
-		fprintf(stderr, "writing bt block %u\n", curs->level[i].agbno);
-#endif
-		if (curs->level[i].prev_buf_p != NULL)  {
-			ASSERT(curs->level[i].prev_agbno != NULLAGBLOCK);
-#if defined(XR_BLD_FREE_TRACE) || defined(XR_BLD_INO_TRACE)
-			fprintf(stderr, "writing bt prev block %u\n",
-						curs->level[i].prev_agbno);
-#endif
-			libxfs_writebuf(curs->level[i].prev_buf_p, 0);
-		}
-		libxfs_writebuf(curs->level[i].buf_p, 0);
-	}
-}
-
-static void
-finish_cursor(bt_status_t *curs)
-{
-	ASSERT(curs->num_free_blocks == 0);
-	free(curs->btree_blocks);
-}
-
 /*
  * Scoop up leftovers from a rebuild cursor for later freeing, then free the
  * rebuild context.
@@ -537,30 +321,6 @@ _("Insufficient memory saving lost blocks.\n"));
 	xrep_newbt_destroy(&btr->newbt, 0);
 }
 
-/* Map btnum to buffer ops for the types that need it. */
-static const struct xfs_buf_ops *
-btnum_to_ops(
-	xfs_btnum_t	btnum)
-{
-	switch (btnum) {
-	case XFS_BTNUM_BNO:
-		return &xfs_bnobt_buf_ops;
-	case XFS_BTNUM_CNT:
-		return &xfs_cntbt_buf_ops;
-	case XFS_BTNUM_INO:
-		return &xfs_inobt_buf_ops;
-	case XFS_BTNUM_FINO:
-		return &xfs_finobt_buf_ops;
-	case XFS_BTNUM_RMAP:
-		return &xfs_rmapbt_buf_ops;
-	case XFS_BTNUM_REFC:
-		return &xfs_refcountbt_buf_ops;
-	default:
-		ASSERT(0);
-		return NULL;
-	}
-}
-
 /*
  * Free Space Btrees
  *


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 9/9] xfs_repair: track blocks lost during btree construction via extents
  2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
                   ` (7 preceding siblings ...)
  2020-01-01  1:22 ` [PATCH 8/9] xfs_repair: remove old btree rebuild support code Darrick J. Wong
@ 2020-01-01  1:22 ` Darrick J. Wong
  8 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-01-01  1:22 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Use extent records (not just raw fsbs) to track blocks that were lost
during btree construction.  This makes it somewhat more efficient.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |   60 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 26 deletions(-)


diff --git a/repair/phase5.c b/repair/phase5.c
index 94fc17d8..1519a372 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -20,6 +20,11 @@
 #include "rmap.h"
 #include "bload.h"
 
+struct lost_fsb {
+	xfs_fsblock_t		fsbno;
+	xfs_extlen_t		len;
+};
+
 struct bt_rebuild {
 	struct xrep_newbt	newbt;
 	struct xfs_btree_bload	bload;
@@ -301,21 +306,24 @@ static void
 finish_rebuild(
 	struct xfs_mount	*mp,
 	struct bt_rebuild	*btr,
-	struct xfs_slab		*lost_fsb)
+	struct xfs_slab		*lost_fsbs)
 {
 	struct xrep_newbt_resv	*resv, *n;
 
 	for_each_xrep_newbt_reservation(&btr->newbt, resv, n) {
-		while (resv->used < resv->len) {
-			xfs_fsblock_t	fsb = resv->fsbno + resv->used;
-			int		error;
+		struct lost_fsb	lost;
+		int		error;
 
-			error = slab_add(lost_fsb, &fsb);
-			if (error)
-				do_error(
+		if (resv->used == resv->len)
+			continue;
+
+		lost.fsbno = resv->fsbno + resv->used;
+		lost.len = resv->len - resv->used;
+		error = slab_add(lost_fsbs, &lost);
+		if (error)
+			do_error(
 _("Insufficient memory saving lost blocks.\n"));
-			resv->used++;
-		}
+		resv->used = resv->len;
 	}
 
 	xrep_newbt_destroy(&btr->newbt, 0);
@@ -1039,7 +1047,7 @@ build_agf_agfl(
 	int			lostblocks,	/* # blocks that will be lost */
 	struct bt_rebuild	*btr_rmap,
 	struct bt_rebuild	*btr_refcount,
-	struct xfs_slab		*lost_fsb)
+	struct xfs_slab		*lost_fsbs)
 {
 	struct extent_tree_node	*ext_ptr;
 	struct xfs_buf		*agf_buf, *agfl_buf;
@@ -1238,7 +1246,7 @@ static void
 phase5_func(
 	struct xfs_mount	*mp,
 	xfs_agnumber_t		agno,
-	struct xfs_slab		*lost_fsb)
+	struct xfs_slab		*lost_fsbs)
 {
 	struct repair_ctx	sc = { .mp = mp, };
 	struct agi_stat		agi_stat = {0,};
@@ -1373,7 +1381,7 @@ phase5_func(
 	 * set up agf and agfl
 	 */
 	build_agf_agfl(mp, agno, &btr_bno, &btr_cnt, freeblks1, extra_blocks,
-			&btr_rmap, &btr_refcount, lost_fsb);
+			&btr_rmap, &btr_refcount, lost_fsbs);
 
 	/*
 	 * build inode allocation trees.
@@ -1388,15 +1396,15 @@ phase5_func(
 	/*
 	 * tear down cursors
 	 */
-	finish_rebuild(mp, &btr_bno, lost_fsb);
-	finish_rebuild(mp, &btr_cnt, lost_fsb);
-	finish_rebuild(mp, &btr_ino, lost_fsb);
+	finish_rebuild(mp, &btr_bno, lost_fsbs);
+	finish_rebuild(mp, &btr_cnt, lost_fsbs);
+	finish_rebuild(mp, &btr_ino, lost_fsbs);
 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
-		finish_rebuild(mp, &btr_fino, lost_fsb);
+		finish_rebuild(mp, &btr_fino, lost_fsbs);
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-		finish_rebuild(mp, &btr_rmap, lost_fsb);
+		finish_rebuild(mp, &btr_rmap, lost_fsbs);
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
-		finish_rebuild(mp, &btr_refcount, lost_fsb);
+		finish_rebuild(mp, &btr_refcount, lost_fsbs);
 
 	/*
 	 * release the incore per-AG bno/bcnt trees so
@@ -1416,19 +1424,19 @@ inject_lost_blocks(
 {
 	struct xfs_trans	*tp = NULL;
 	struct xfs_slab_cursor	*cur = NULL;
-	xfs_fsblock_t		*fsb;
+	struct lost_fsb		*lost;
 	int			error;
 
 	error = init_slab_cursor(lost_fsbs, NULL, &cur);
 	if (error)
 		return error;
 
-	while ((fsb = pop_slab_cursor(cur)) != NULL) {
+	while ((lost = pop_slab_cursor(cur)) != NULL) {
 		error = -libxfs_trans_alloc_rollable(mp, 16, &tp);
 		if (error)
 			goto out_cancel;
 
-		error = -libxfs_free_extent(tp, *fsb, 1,
+		error = -libxfs_free_extent(tp, lost->fsbno, lost->len,
 				&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
 		if (error)
 			goto out_cancel;
@@ -1449,7 +1457,7 @@ inject_lost_blocks(
 void
 phase5(xfs_mount_t *mp)
 {
-	struct xfs_slab		*lost_fsb;
+	struct xfs_slab		*lost_fsbs;
 	xfs_agnumber_t		agno;
 	int			error;
 
@@ -1492,12 +1500,12 @@ phase5(xfs_mount_t *mp)
 	if (sb_fdblocks_ag == NULL)
 		do_error(_("cannot alloc sb_fdblocks_ag buffers\n"));
 
-	error = init_slab(&lost_fsb, sizeof(xfs_fsblock_t));
+	error = init_slab(&lost_fsbs, sizeof(struct lost_fsb));
 	if (error)
 		do_error(_("cannot alloc lost block slab\n"));
 
 	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)
-		phase5_func(mp, agno, lost_fsb);
+		phase5_func(mp, agno, lost_fsbs);
 
 	print_final_rpt();
 
@@ -1540,10 +1548,10 @@ _("unable to add AG %u reverse-mapping data to btree.\n"), agno);
 	 * Put blocks that were unnecessarily reserved for btree
 	 * reconstruction back into the filesystem free space data.
 	 */
-	error = inject_lost_blocks(mp, lost_fsb);
+	error = inject_lost_blocks(mp, lost_fsbs);
 	if (error)
 		do_error(_("Unable to reinsert lost blocks into filesystem.\n"));
-	free_slab(&lost_fsb);
+	free_slab(&lost_fsbs);
 
 	bad_ino_btree = 0;
 


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-06-01 12:03           ` Brian Foster
@ 2020-06-02  0:12             ` Darrick J. Wong
  0 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-06-02  0:12 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Mon, Jun 01, 2020 at 08:03:38AM -0400, Brian Foster wrote:
> On Fri, May 29, 2020 at 02:01:59PM -0700, Darrick J. Wong wrote:
> > On Thu, May 28, 2020 at 11:08:36AM -0400, Brian Foster wrote:
> > > On Wed, May 27, 2020 at 03:34:24PM -0700, Darrick J. Wong wrote:
> > > > On Wed, May 27, 2020 at 08:15:31AM -0400, Brian Foster wrote:
> > > > > On Tue, May 19, 2020 at 06:50:49PM -0700, Darrick J. Wong wrote:
> > > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > > 
> > > > > > Port the new btree staging context and related block reservation helper
> > > > > > code from the kernel to repair.  We'll use this in subsequent patches to
> > > > > > implement btree bulk loading.
> > > > > > 
> > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > > ---
> > > > > >  include/libxfs.h         |    1 
> > > > > >  libxfs/libxfs_api_defs.h |    2 
> > > > > >  repair/Makefile          |    4 -
> > > > > >  repair/bload.c           |  303 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > > >  repair/bload.h           |   77 ++++++++++++
> > > > > >  repair/xfs_repair.c      |   17 +++
> > > > > >  6 files changed, 402 insertions(+), 2 deletions(-)
> > > > > >  create mode 100644 repair/bload.c
> > > > > >  create mode 100644 repair/bload.h
> > > > > > 
> > > > > > 
> > > > > ...
> > > > > > diff --git a/repair/bload.c b/repair/bload.c
> > > > > > new file mode 100644
> > > > > > index 00000000..9bc17468
> > > > > > --- /dev/null
> > > > > > +++ b/repair/bload.c
> > > > > > @@ -0,0 +1,303 @@
> > > > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > > > +/*
> > > > > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > > > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > > + */
> > > > > > +#include <libxfs.h>
> > > > > > +#include "bload.h"
> > > > > > +
> > > > > > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > > > > > +#define trace_xrep_newbt_free_blocks(...)	((void) 0)
> > > > > > +
> > > > > > +int bload_leaf_slack = -1;
> > > > > > +int bload_node_slack = -1;
> > > > > > +
> > > > > > +/* Ported routines from fs/xfs/scrub/repair.c */
> > > > > > +
> > > > > 
> > > > > Looks mostly straightforward, but I'll have to come back to this as I
> > > > > get to the code that uses it later in the series. In the meantime, I see
> > > > > some of these helpers in scrub/repair.c while not others. Are there
> > > > > references to other routines that are intended to be copies from kernel
> > > > > code?
> > > > 
> > > > Hm.  I might not understand the question, but in general the code should
> > > > be fairly similar to the kernel functions.  The biggest differences are
> > > > (a) that whole libxfs error code mess, (b) the much simpler repair_ctx
> > > > structure, and (c) the fact that repair doesn't bother with EFIs to
> > > > automatically reap blocks.
> > > > 
> > > > So... the ten functions you see here do the same things as their kernel
> > > > counterparts, but they get to do it in the much simpler userspace
> > > > environment.
> > > > 
> > > 
> > > Right.. I was able to find the first function (xrep_roll_ag_trans())
> > > easily in the kernel because it has the same name. The next one or two
> > > (i.e., xrep_newbt_*()) I couldn't find and then gave up. Are they
> > > renamed? Unmerged?
> > 
> > Ahh, silly me, the rest are as yet unmerged components of the online
> > repair code.  Maybe it makes more sense to drop the "Ported routines
> > from XXXX" comment, particularly since it probably will be easier to
> > merge the xfs_repair series first, which is /much/ smaller in scope.
> > 
> 
> Probably so. It's just going to confuse if this is not intended to land
> at the same time as the kernel code.

<nod> I also realized while re-examining the two codebases that the
xrep_newbt code *isn't* going to converge.  The kernel needs extra bits
for smuggling in EFIs, and userspace doesn't ever need the ability to
bhold the AG header buffers to roll a transaction.

So in effect I dropped a bunch of code and re-prefixed the surviving
code so that this patch will get quite a bit smaller.  I'll try to send
a new revision tomorrow.

--D

> > > > The other functions in scrub/repair.c that didn't get ported are either
> > > > for other types of repairs or exist to support the in-kernel code and
> > > > aren't needed here.
> > > > 
> > > 
> > > Sure, I'm just curious how to identify the source of the ones that are.
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/tree/fs/xfs/scrub/repair.c?h=djwong-wtf
> > 
> > Is a fairly recent snapshot of what the kernel version looks like.
> > 
> 
> Thanks.
> 
> Brian
> 
> > --D
> > 
> > > 
> > > Brian
> > > 
> > > > --D
> > > > 
> > > > > Brian
> > > > > 
> > > > > > +/*
> > > > > > + * Roll a transaction, keeping the AG headers locked and reinitializing
> > > > > > + * the btree cursors.
> > > > > > + */
> > > > > > +int
> > > > > > +xrep_roll_ag_trans(
> > > > > > +	struct repair_ctx	*sc)
> > > > > > +{
> > > > > > +	int			error;
> > > > > > +
> > > > > > +	/* Keep the AG header buffers locked so we can keep going. */
> > > > > > +	if (sc->agi_bp)
> > > > > > +		libxfs_trans_bhold(sc->tp, sc->agi_bp);
> > > > > > +	if (sc->agf_bp)
> > > > > > +		libxfs_trans_bhold(sc->tp, sc->agf_bp);
> > > > > > +	if (sc->agfl_bp)
> > > > > > +		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * Roll the transaction.  We still own the buffer and the buffer lock
> > > > > > +	 * regardless of whether or not the roll succeeds.  If the roll fails,
> > > > > > +	 * the buffers will be released during teardown on our way out of the
> > > > > > +	 * kernel.  If it succeeds, we join them to the new transaction and
> > > > > > +	 * move on.
> > > > > > +	 */
> > > > > > +	error = -libxfs_trans_roll(&sc->tp);
> > > > > > +	if (error)
> > > > > > +		return error;
> > > > > > +
> > > > > > +	/* Join AG headers to the new transaction. */
> > > > > > +	if (sc->agi_bp)
> > > > > > +		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
> > > > > > +	if (sc->agf_bp)
> > > > > > +		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
> > > > > > +	if (sc->agfl_bp)
> > > > > > +		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
> > > > > > +
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > > > +/* Initialize accounting resources for staging a new AG btree. */
> > > > > > +void
> > > > > > +xrep_newbt_init_ag(
> > > > > > +	struct xrep_newbt		*xnr,
> > > > > > +	struct repair_ctx		*sc,
> > > > > > +	const struct xfs_owner_info	*oinfo,
> > > > > > +	xfs_fsblock_t			alloc_hint,
> > > > > > +	enum xfs_ag_resv_type		resv)
> > > > > > +{
> > > > > > +	memset(xnr, 0, sizeof(struct xrep_newbt));
> > > > > > +	xnr->sc = sc;
> > > > > > +	xnr->oinfo = *oinfo; /* structure copy */
> > > > > > +	xnr->alloc_hint = alloc_hint;
> > > > > > +	xnr->resv = resv;
> > > > > > +	INIT_LIST_HEAD(&xnr->resv_list);
> > > > > > +}
> > > > > > +
> > > > > > +/* Initialize accounting resources for staging a new inode fork btree. */
> > > > > > +void
> > > > > > +xrep_newbt_init_inode(
> > > > > > +	struct xrep_newbt		*xnr,
> > > > > > +	struct repair_ctx		*sc,
> > > > > > +	int				whichfork,
> > > > > > +	const struct xfs_owner_info	*oinfo)
> > > > > > +{
> > > > > > +	xrep_newbt_init_ag(xnr, sc, oinfo,
> > > > > > +			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
> > > > > > +			XFS_AG_RESV_NONE);
> > > > > > +	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
> > > > > > +	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
> > > > > > +}
> > > > > > +
> > > > > > +/*
> > > > > > + * Initialize accounting resources for staging a new btree.  Callers are
> > > > > > + * expected to add their own reservations (and clean them up) manually.
> > > > > > + */
> > > > > > +void
> > > > > > +xrep_newbt_init_bare(
> > > > > > +	struct xrep_newbt		*xnr,
> > > > > > +	struct repair_ctx		*sc)
> > > > > > +{
> > > > > > +	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
> > > > > > +			XFS_AG_RESV_NONE);
> > > > > > +}
> > > > > > +
> > > > > > +/* Designate specific blocks to be used to build our new btree. */
> > > > > > +int
> > > > > > +xrep_newbt_add_blocks(
> > > > > > +	struct xrep_newbt	*xnr,
> > > > > > +	xfs_fsblock_t		fsbno,
> > > > > > +	xfs_extlen_t		len)
> > > > > > +{
> > > > > > +	struct xrep_newbt_resv	*resv;
> > > > > > +
> > > > > > +	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
> > > > > > +	if (!resv)
> > > > > > +		return ENOMEM;
> > > > > > +
> > > > > > +	INIT_LIST_HEAD(&resv->list);
> > > > > > +	resv->fsbno = fsbno;
> > > > > > +	resv->len = len;
> > > > > > +	resv->used = 0;
> > > > > > +	list_add_tail(&resv->list, &xnr->resv_list);
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > > > +/* Reserve disk space for our new btree. */
> > > > > > +int
> > > > > > +xrep_newbt_alloc_blocks(
> > > > > > +	struct xrep_newbt	*xnr,
> > > > > > +	uint64_t		nr_blocks)
> > > > > > +{
> > > > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > > > +	xfs_alloctype_t		type;
> > > > > > +	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
> > > > > > +	int			error = 0;
> > > > > > +
> > > > > > +	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
> > > > > > +
> > > > > > +	while (nr_blocks > 0 && !error) {
> > > > > > +		struct xfs_alloc_arg	args = {
> > > > > > +			.tp		= sc->tp,
> > > > > > +			.mp		= sc->mp,
> > > > > > +			.type		= type,
> > > > > > +			.fsbno		= alloc_hint,
> > > > > > +			.oinfo		= xnr->oinfo,
> > > > > > +			.minlen		= 1,
> > > > > > +			.maxlen		= nr_blocks,
> > > > > > +			.prod		= 1,
> > > > > > +			.resv		= xnr->resv,
> > > > > > +		};
> > > > > > +
> > > > > > +		error = -libxfs_alloc_vextent(&args);
> > > > > > +		if (error)
> > > > > > +			return error;
> > > > > > +		if (args.fsbno == NULLFSBLOCK)
> > > > > > +			return ENOSPC;
> > > > > > +
> > > > > > +		/* We don't have real EFIs here so skip that. */
> > > > > > +
> > > > > > +		error = xrep_newbt_add_blocks(xnr, args.fsbno, args.len);
> > > > > > +		if (error)
> > > > > > +			break;
> > > > > > +
> > > > > > +		nr_blocks -= args.len;
> > > > > > +		alloc_hint = args.fsbno + args.len - 1;
> > > > > > +
> > > > > > +		if (sc->ip)
> > > > > > +			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > > > > > +		else
> > > > > > +			error = xrep_roll_ag_trans(sc);
> > > > > > +	}
> > > > > > +
> > > > > > +	return error;
> > > > > > +}
> > > > > > +
> > > > > > +/*
> > > > > > + * Release blocks that were reserved for a btree repair.  If the repair
> > > > > > + * succeeded then we log deferred frees for unused blocks.  Otherwise, we try
> > > > > > + * to free the extents immediately to roll the filesystem back to where it was
> > > > > > + * before we started.
> > > > > > + */
> > > > > > +static inline int
> > > > > > +xrep_newbt_destroy_reservation(
> > > > > > +	struct xrep_newbt	*xnr,
> > > > > > +	struct xrep_newbt_resv	*resv,
> > > > > > +	bool			cancel_repair)
> > > > > > +{
> > > > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > > > +
> > > > > > +	if (cancel_repair) {
> > > > > > +		int		error;
> > > > > > +
> > > > > > +		/* Free the extent then roll the transaction. */
> > > > > > +		error = -libxfs_free_extent(sc->tp, resv->fsbno, resv->len,
> > > > > > +				&xnr->oinfo, xnr->resv);
> > > > > > +		if (error)
> > > > > > +			return error;
> > > > > > +
> > > > > > +		if (sc->ip)
> > > > > > +			return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > > > > > +		return xrep_roll_ag_trans(sc);
> > > > > > +	}
> > > > > > +
> > > > > > +	/* We don't have EFIs here so skip the EFD. */
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * Use the deferred freeing mechanism to schedule for deletion any
> > > > > > +	 * blocks we didn't use to rebuild the tree.  This enables us to log
> > > > > > +	 * them all in the same transaction as the root change.
> > > > > > +	 */
> > > > > > +	resv->fsbno += resv->used;
> > > > > > +	resv->len -= resv->used;
> > > > > > +	resv->used = 0;
> > > > > > +
> > > > > > +	if (resv->len == 0)
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	trace_xrep_newbt_free_blocks(sc->mp,
> > > > > > +			XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> > > > > > +			XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> > > > > > +			resv->len, xnr->oinfo.oi_owner);
> > > > > > +
> > > > > > +	__xfs_bmap_add_free(sc->tp, resv->fsbno, resv->len, &xnr->oinfo, true);
> > > > > > +
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > > > +/* Free all the accounting info and disk space we reserved for a new btree. */
> > > > > > +void
> > > > > > +xrep_newbt_destroy(
> > > > > > +	struct xrep_newbt	*xnr,
> > > > > > +	int			error)
> > > > > > +{
> > > > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > > > +	struct xrep_newbt_resv	*resv, *n;
> > > > > > +	int			err2;
> > > > > > +
> > > > > > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > > > > > +		err2 = xrep_newbt_destroy_reservation(xnr, resv, error != 0);
> > > > > > +		if (err2)
> > > > > > +			goto junkit;
> > > > > > +
> > > > > > +		list_del(&resv->list);
> > > > > > +		kmem_free(resv);
> > > > > > +	}
> > > > > > +
> > > > > > +junkit:
> > > > > > +	/*
> > > > > > +	 * If we still have reservations attached to @newbt, cleanup must have
> > > > > > +	 * failed and the filesystem is about to go down.  Clean up the incore
> > > > > > +	 * reservations.
> > > > > > +	 */
> > > > > > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > > > > > +		list_del(&resv->list);
> > > > > > +		kmem_free(resv);
> > > > > > +	}
> > > > > > +
> > > > > > +	if (sc->ip) {
> > > > > > +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> > > > > > +		xnr->ifake.if_fork = NULL;
> > > > > > +	}
> > > > > > +}
> > > > > > +
> > > > > > +/* Feed one of the reserved btree blocks to the bulk loader. */
> > > > > > +int
> > > > > > +xrep_newbt_claim_block(
> > > > > > +	struct xfs_btree_cur	*cur,
> > > > > > +	struct xrep_newbt	*xnr,
> > > > > > +	union xfs_btree_ptr	*ptr)
> > > > > > +{
> > > > > > +	struct xrep_newbt_resv	*resv;
> > > > > > +	xfs_fsblock_t		fsb;
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * The first item in the list should always have a free block unless
> > > > > > +	 * we're completely out.
> > > > > > +	 */
> > > > > > +	resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
> > > > > > +	if (resv->used == resv->len)
> > > > > > +		return ENOSPC;
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * Peel off a block from the start of the reservation.  We allocate
> > > > > > +	 * blocks in order to place blocks on disk in increasing record or key
> > > > > > +	 * order.  The block reservations tend to end up on the list in
> > > > > > +	 * decreasing order, which hopefully results in leaf blocks ending up
> > > > > > +	 * together.
> > > > > > +	 */
> > > > > > +	fsb = resv->fsbno + resv->used;
> > > > > > +	resv->used++;
> > > > > > +
> > > > > > +	/* If we used all the blocks in this reservation, move it to the end. */
> > > > > > +	if (resv->used == resv->len)
> > > > > > +		list_move_tail(&resv->list, &xnr->resv_list);
> > > > > > +
> > > > > > +	trace_xrep_newbt_claim_block(cur->bc_mp,
> > > > > > +			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
> > > > > > +			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
> > > > > > +			1, xnr->oinfo.oi_owner);
> > > > > > +
> > > > > > +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
> > > > > > +		ptr->l = cpu_to_be64(fsb);
> > > > > > +	else
> > > > > > +		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
> > > > > > +	return 0;
> > > > > > +}
> > > > > > diff --git a/repair/bload.h b/repair/bload.h
> > > > > > new file mode 100644
> > > > > > index 00000000..020c4834
> > > > > > --- /dev/null
> > > > > > +++ b/repair/bload.h
> > > > > > @@ -0,0 +1,77 @@
> > > > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > > > +/*
> > > > > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > > > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > > + */
> > > > > > +#ifndef __XFS_REPAIR_BLOAD_H__
> > > > > > +#define __XFS_REPAIR_BLOAD_H__
> > > > > > +
> > > > > > +extern int bload_leaf_slack;
> > > > > > +extern int bload_node_slack;
> > > > > > +
> > > > > > +struct repair_ctx {
> > > > > > +	struct xfs_mount	*mp;
> > > > > > +	struct xfs_inode	*ip;
> > > > > > +	struct xfs_trans	*tp;
> > > > > > +
> > > > > > +	struct xfs_buf		*agi_bp;
> > > > > > +	struct xfs_buf		*agf_bp;
> > > > > > +	struct xfs_buf		*agfl_bp;
> > > > > > +};
> > > > > > +
> > > > > > +struct xrep_newbt_resv {
> > > > > > +	/* Link to list of extents that we've reserved. */
> > > > > > +	struct list_head	list;
> > > > > > +
> > > > > > +	/* FSB of the block we reserved. */
> > > > > > +	xfs_fsblock_t		fsbno;
> > > > > > +
> > > > > > +	/* Length of the reservation. */
> > > > > > +	xfs_extlen_t		len;
> > > > > > +
> > > > > > +	/* How much of this reservation we've used. */
> > > > > > +	xfs_extlen_t		used;
> > > > > > +};
> > > > > > +
> > > > > > +struct xrep_newbt {
> > > > > > +	struct repair_ctx	*sc;
> > > > > > +
> > > > > > +	/* List of extents that we've reserved. */
> > > > > > +	struct list_head	resv_list;
> > > > > > +
> > > > > > +	/* Fake root for new btree. */
> > > > > > +	union {
> > > > > > +		struct xbtree_afakeroot	afake;
> > > > > > +		struct xbtree_ifakeroot	ifake;
> > > > > > +	};
> > > > > > +
> > > > > > +	/* rmap owner of these blocks */
> > > > > > +	struct xfs_owner_info	oinfo;
> > > > > > +
> > > > > > +	/* The last reservation we allocated from. */
> > > > > > +	struct xrep_newbt_resv	*last_resv;
> > > > > > +
> > > > > > +	/* Allocation hint */
> > > > > > +	xfs_fsblock_t		alloc_hint;
> > > > > > +
> > > > > > +	/* per-ag reservation type */
> > > > > > +	enum xfs_ag_resv_type	resv;
> > > > > > +};
> > > > > > +
> > > > > > +#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
> > > > > > +	list_for_each_entry_safe((resv), (n), &(xnr)->resv_list, list)
> > > > > > +
> > > > > > +void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct repair_ctx *sc);
> > > > > > +void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > > > > > +		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
> > > > > > +		enum xfs_ag_resv_type resv);
> > > > > > +void xrep_newbt_init_inode(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > > > > > +		int whichfork, const struct xfs_owner_info *oinfo);
> > > > > > +int xrep_newbt_add_blocks(struct xrep_newbt *xnr, xfs_fsblock_t fsbno,
> > > > > > +		xfs_extlen_t len);
> > > > > > +int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
> > > > > > +void xrep_newbt_destroy(struct xrep_newbt *xnr, int error);
> > > > > > +int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
> > > > > > +		union xfs_btree_ptr *ptr);
> > > > > > +
> > > > > > +#endif /* __XFS_REPAIR_BLOAD_H__ */
> > > > > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > > > > index 9d72fa8e..8fbd3649 100644
> > > > > > --- a/repair/xfs_repair.c
> > > > > > +++ b/repair/xfs_repair.c
> > > > > > @@ -24,6 +24,7 @@
> > > > > >  #include "rmap.h"
> > > > > >  #include "libfrog/fsgeom.h"
> > > > > >  #include "libfrog/platform.h"
> > > > > > +#include "bload.h"
> > > > > >  
> > > > > >  /*
> > > > > >   * option tables for getsubopt calls
> > > > > > @@ -39,6 +40,8 @@ enum o_opt_nums {
> > > > > >  	AG_STRIDE,
> > > > > >  	FORCE_GEO,
> > > > > >  	PHASE2_THREADS,
> > > > > > +	BLOAD_LEAF_SLACK,
> > > > > > +	BLOAD_NODE_SLACK,
> > > > > >  	O_MAX_OPTS,
> > > > > >  };
> > > > > >  
> > > > > > @@ -49,6 +52,8 @@ static char *o_opts[] = {
> > > > > >  	[AG_STRIDE]		= "ag_stride",
> > > > > >  	[FORCE_GEO]		= "force_geometry",
> > > > > >  	[PHASE2_THREADS]	= "phase2_threads",
> > > > > > +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> > > > > > +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
> > > > > >  	[O_MAX_OPTS]		= NULL,
> > > > > >  };
> > > > > >  
> > > > > > @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
> > > > > >  		_("-o phase2_threads requires a parameter\n"));
> > > > > >  					phase2_threads = (int)strtol(val, NULL, 0);
> > > > > >  					break;
> > > > > > +				case BLOAD_LEAF_SLACK:
> > > > > > +					if (!val)
> > > > > > +						do_abort(
> > > > > > +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> > > > > > +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> > > > > > +					break;
> > > > > > +				case BLOAD_NODE_SLACK:
> > > > > > +					if (!val)
> > > > > > +						do_abort(
> > > > > > +		_("-o debug_bload_node_slack requires a parameter\n"));
> > > > > > +					bload_node_slack = (int)strtol(val, NULL, 0);
> > > > > > +					break;
> > > > > >  				default:
> > > > > >  					unknown('o', val);
> > > > > >  					break;
> > > > > > 
> > > > > 
> > > > 
> > > 
> > 
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-29 21:01         ` Darrick J. Wong
@ 2020-06-01 12:03           ` Brian Foster
  2020-06-02  0:12             ` Darrick J. Wong
  0 siblings, 1 reply; 25+ messages in thread
From: Brian Foster @ 2020-06-01 12:03 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Fri, May 29, 2020 at 02:01:59PM -0700, Darrick J. Wong wrote:
> On Thu, May 28, 2020 at 11:08:36AM -0400, Brian Foster wrote:
> > On Wed, May 27, 2020 at 03:34:24PM -0700, Darrick J. Wong wrote:
> > > On Wed, May 27, 2020 at 08:15:31AM -0400, Brian Foster wrote:
> > > > On Tue, May 19, 2020 at 06:50:49PM -0700, Darrick J. Wong wrote:
> > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > 
> > > > > Port the new btree staging context and related block reservation helper
> > > > > code from the kernel to repair.  We'll use this in subsequent patches to
> > > > > implement btree bulk loading.
> > > > > 
> > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > ---
> > > > >  include/libxfs.h         |    1 
> > > > >  libxfs/libxfs_api_defs.h |    2 
> > > > >  repair/Makefile          |    4 -
> > > > >  repair/bload.c           |  303 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > >  repair/bload.h           |   77 ++++++++++++
> > > > >  repair/xfs_repair.c      |   17 +++
> > > > >  6 files changed, 402 insertions(+), 2 deletions(-)
> > > > >  create mode 100644 repair/bload.c
> > > > >  create mode 100644 repair/bload.h
> > > > > 
> > > > > 
> > > > ...
> > > > > diff --git a/repair/bload.c b/repair/bload.c
> > > > > new file mode 100644
> > > > > index 00000000..9bc17468
> > > > > --- /dev/null
> > > > > +++ b/repair/bload.c
> > > > > @@ -0,0 +1,303 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > > +/*
> > > > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > + */
> > > > > +#include <libxfs.h>
> > > > > +#include "bload.h"
> > > > > +
> > > > > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > > > > +#define trace_xrep_newbt_free_blocks(...)	((void) 0)
> > > > > +
> > > > > +int bload_leaf_slack = -1;
> > > > > +int bload_node_slack = -1;
> > > > > +
> > > > > +/* Ported routines from fs/xfs/scrub/repair.c */
> > > > > +
> > > > 
> > > > Looks mostly straightforward, but I'll have to come back to this as I
> > > > get to the code that uses it later in the series. In the meantime, I see
> > > > some of these helpers in scrub/repair.c while not others. Are there
> > > > references to other routines that are intended to be copies from kernel
> > > > code?
> > > 
> > > Hm.  I might not understand the question, but in general the code should
> > > be fairly similar to the kernel functions.  The biggest differences are
> > > (a) that whole libxfs error code mess, (b) the much simpler repair_ctx
> > > structure, and (c) the fact that repair doesn't bother with EFIs to
> > > automatically reap blocks.
> > > 
> > > So... the ten functions you see here do the same things as their kernel
> > > counterparts, but they get to do it in the much simpler userspace
> > > environment.
> > > 
> > 
> > Right.. I was able to find the first function (xrep_roll_ag_trans())
> > easily in the kernel because it has the same name. The next one or two
> > (i.e., xrep_newbt_*()) I couldn't find and then gave up. Are they
> > renamed? Unmerged?
> 
> Ahh, silly me, the rest are as yet unmerged components of the online
> repair code.  Maybe it makes more sense to drop the "Ported routines
> from XXXX" comment, particularly since it probably will be easier to
> merge the xfs_repair series first, which is /much/ smaller in scope.
> 

Probably so. It's just going to confuse if this is not intended to land
at the same time as the kernel code.

> > > The other functions in scrub/repair.c that didn't get ported are either
> > > for other types of repairs or exist to support the in-kernel code and
> > > aren't needed here.
> > > 
> > 
> > Sure, I'm just curious how to identify the source of the ones that are.
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/tree/fs/xfs/scrub/repair.c?h=djwong-wtf
> 
> Is a fairly recent snapshot of what the kernel version looks like.
> 

Thanks.

Brian

> --D
> 
> > 
> > Brian
> > 
> > > --D
> > > 
> > > > Brian
> > > > 
> > > > > +/*
> > > > > + * Roll a transaction, keeping the AG headers locked and reinitializing
> > > > > + * the btree cursors.
> > > > > + */
> > > > > +int
> > > > > +xrep_roll_ag_trans(
> > > > > +	struct repair_ctx	*sc)
> > > > > +{
> > > > > +	int			error;
> > > > > +
> > > > > +	/* Keep the AG header buffers locked so we can keep going. */
> > > > > +	if (sc->agi_bp)
> > > > > +		libxfs_trans_bhold(sc->tp, sc->agi_bp);
> > > > > +	if (sc->agf_bp)
> > > > > +		libxfs_trans_bhold(sc->tp, sc->agf_bp);
> > > > > +	if (sc->agfl_bp)
> > > > > +		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
> > > > > +
> > > > > +	/*
> > > > > +	 * Roll the transaction.  We still own the buffer and the buffer lock
> > > > > +	 * regardless of whether or not the roll succeeds.  If the roll fails,
> > > > > +	 * the buffers will be released during teardown on our way out of the
> > > > > +	 * kernel.  If it succeeds, we join them to the new transaction and
> > > > > +	 * move on.
> > > > > +	 */
> > > > > +	error = -libxfs_trans_roll(&sc->tp);
> > > > > +	if (error)
> > > > > +		return error;
> > > > > +
> > > > > +	/* Join AG headers to the new transaction. */
> > > > > +	if (sc->agi_bp)
> > > > > +		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
> > > > > +	if (sc->agf_bp)
> > > > > +		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
> > > > > +	if (sc->agfl_bp)
> > > > > +		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +/* Initialize accounting resources for staging a new AG btree. */
> > > > > +void
> > > > > +xrep_newbt_init_ag(
> > > > > +	struct xrep_newbt		*xnr,
> > > > > +	struct repair_ctx		*sc,
> > > > > +	const struct xfs_owner_info	*oinfo,
> > > > > +	xfs_fsblock_t			alloc_hint,
> > > > > +	enum xfs_ag_resv_type		resv)
> > > > > +{
> > > > > +	memset(xnr, 0, sizeof(struct xrep_newbt));
> > > > > +	xnr->sc = sc;
> > > > > +	xnr->oinfo = *oinfo; /* structure copy */
> > > > > +	xnr->alloc_hint = alloc_hint;
> > > > > +	xnr->resv = resv;
> > > > > +	INIT_LIST_HEAD(&xnr->resv_list);
> > > > > +}
> > > > > +
> > > > > +/* Initialize accounting resources for staging a new inode fork btree. */
> > > > > +void
> > > > > +xrep_newbt_init_inode(
> > > > > +	struct xrep_newbt		*xnr,
> > > > > +	struct repair_ctx		*sc,
> > > > > +	int				whichfork,
> > > > > +	const struct xfs_owner_info	*oinfo)
> > > > > +{
> > > > > +	xrep_newbt_init_ag(xnr, sc, oinfo,
> > > > > +			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
> > > > > +			XFS_AG_RESV_NONE);
> > > > > +	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
> > > > > +	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * Initialize accounting resources for staging a new btree.  Callers are
> > > > > + * expected to add their own reservations (and clean them up) manually.
> > > > > + */
> > > > > +void
> > > > > +xrep_newbt_init_bare(
> > > > > +	struct xrep_newbt		*xnr,
> > > > > +	struct repair_ctx		*sc)
> > > > > +{
> > > > > +	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
> > > > > +			XFS_AG_RESV_NONE);
> > > > > +}
> > > > > +
> > > > > +/* Designate specific blocks to be used to build our new btree. */
> > > > > +int
> > > > > +xrep_newbt_add_blocks(
> > > > > +	struct xrep_newbt	*xnr,
> > > > > +	xfs_fsblock_t		fsbno,
> > > > > +	xfs_extlen_t		len)
> > > > > +{
> > > > > +	struct xrep_newbt_resv	*resv;
> > > > > +
> > > > > +	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
> > > > > +	if (!resv)
> > > > > +		return ENOMEM;
> > > > > +
> > > > > +	INIT_LIST_HEAD(&resv->list);
> > > > > +	resv->fsbno = fsbno;
> > > > > +	resv->len = len;
> > > > > +	resv->used = 0;
> > > > > +	list_add_tail(&resv->list, &xnr->resv_list);
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +/* Reserve disk space for our new btree. */
> > > > > +int
> > > > > +xrep_newbt_alloc_blocks(
> > > > > +	struct xrep_newbt	*xnr,
> > > > > +	uint64_t		nr_blocks)
> > > > > +{
> > > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > > +	xfs_alloctype_t		type;
> > > > > +	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
> > > > > +	int			error = 0;
> > > > > +
> > > > > +	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
> > > > > +
> > > > > +	while (nr_blocks > 0 && !error) {
> > > > > +		struct xfs_alloc_arg	args = {
> > > > > +			.tp		= sc->tp,
> > > > > +			.mp		= sc->mp,
> > > > > +			.type		= type,
> > > > > +			.fsbno		= alloc_hint,
> > > > > +			.oinfo		= xnr->oinfo,
> > > > > +			.minlen		= 1,
> > > > > +			.maxlen		= nr_blocks,
> > > > > +			.prod		= 1,
> > > > > +			.resv		= xnr->resv,
> > > > > +		};
> > > > > +
> > > > > +		error = -libxfs_alloc_vextent(&args);
> > > > > +		if (error)
> > > > > +			return error;
> > > > > +		if (args.fsbno == NULLFSBLOCK)
> > > > > +			return ENOSPC;
> > > > > +
> > > > > +		/* We don't have real EFIs here so skip that. */
> > > > > +
> > > > > +		error = xrep_newbt_add_blocks(xnr, args.fsbno, args.len);
> > > > > +		if (error)
> > > > > +			break;
> > > > > +
> > > > > +		nr_blocks -= args.len;
> > > > > +		alloc_hint = args.fsbno + args.len - 1;
> > > > > +
> > > > > +		if (sc->ip)
> > > > > +			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > > > > +		else
> > > > > +			error = xrep_roll_ag_trans(sc);
> > > > > +	}
> > > > > +
> > > > > +	return error;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * Release blocks that were reserved for a btree repair.  If the repair
> > > > > + * succeeded then we log deferred frees for unused blocks.  Otherwise, we try
> > > > > + * to free the extents immediately to roll the filesystem back to where it was
> > > > > + * before we started.
> > > > > + */
> > > > > +static inline int
> > > > > +xrep_newbt_destroy_reservation(
> > > > > +	struct xrep_newbt	*xnr,
> > > > > +	struct xrep_newbt_resv	*resv,
> > > > > +	bool			cancel_repair)
> > > > > +{
> > > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > > +
> > > > > +	if (cancel_repair) {
> > > > > +		int		error;
> > > > > +
> > > > > +		/* Free the extent then roll the transaction. */
> > > > > +		error = -libxfs_free_extent(sc->tp, resv->fsbno, resv->len,
> > > > > +				&xnr->oinfo, xnr->resv);
> > > > > +		if (error)
> > > > > +			return error;
> > > > > +
> > > > > +		if (sc->ip)
> > > > > +			return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > > > > +		return xrep_roll_ag_trans(sc);
> > > > > +	}
> > > > > +
> > > > > +	/* We don't have EFIs here so skip the EFD. */
> > > > > +
> > > > > +	/*
> > > > > +	 * Use the deferred freeing mechanism to schedule for deletion any
> > > > > +	 * blocks we didn't use to rebuild the tree.  This enables us to log
> > > > > +	 * them all in the same transaction as the root change.
> > > > > +	 */
> > > > > +	resv->fsbno += resv->used;
> > > > > +	resv->len -= resv->used;
> > > > > +	resv->used = 0;
> > > > > +
> > > > > +	if (resv->len == 0)
> > > > > +		return 0;
> > > > > +
> > > > > +	trace_xrep_newbt_free_blocks(sc->mp,
> > > > > +			XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> > > > > +			XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> > > > > +			resv->len, xnr->oinfo.oi_owner);
> > > > > +
> > > > > +	__xfs_bmap_add_free(sc->tp, resv->fsbno, resv->len, &xnr->oinfo, true);
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > > +/* Free all the accounting info and disk space we reserved for a new btree. */
> > > > > +void
> > > > > +xrep_newbt_destroy(
> > > > > +	struct xrep_newbt	*xnr,
> > > > > +	int			error)
> > > > > +{
> > > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > > +	struct xrep_newbt_resv	*resv, *n;
> > > > > +	int			err2;
> > > > > +
> > > > > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > > > > +		err2 = xrep_newbt_destroy_reservation(xnr, resv, error != 0);
> > > > > +		if (err2)
> > > > > +			goto junkit;
> > > > > +
> > > > > +		list_del(&resv->list);
> > > > > +		kmem_free(resv);
> > > > > +	}
> > > > > +
> > > > > +junkit:
> > > > > +	/*
> > > > > +	 * If we still have reservations attached to @newbt, cleanup must have
> > > > > +	 * failed and the filesystem is about to go down.  Clean up the incore
> > > > > +	 * reservations.
> > > > > +	 */
> > > > > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > > > > +		list_del(&resv->list);
> > > > > +		kmem_free(resv);
> > > > > +	}
> > > > > +
> > > > > +	if (sc->ip) {
> > > > > +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> > > > > +		xnr->ifake.if_fork = NULL;
> > > > > +	}
> > > > > +}
> > > > > +
> > > > > +/* Feed one of the reserved btree blocks to the bulk loader. */
> > > > > +int
> > > > > +xrep_newbt_claim_block(
> > > > > +	struct xfs_btree_cur	*cur,
> > > > > +	struct xrep_newbt	*xnr,
> > > > > +	union xfs_btree_ptr	*ptr)
> > > > > +{
> > > > > +	struct xrep_newbt_resv	*resv;
> > > > > +	xfs_fsblock_t		fsb;
> > > > > +
> > > > > +	/*
> > > > > +	 * The first item in the list should always have a free block unless
> > > > > +	 * we're completely out.
> > > > > +	 */
> > > > > +	resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
> > > > > +	if (resv->used == resv->len)
> > > > > +		return ENOSPC;
> > > > > +
> > > > > +	/*
> > > > > +	 * Peel off a block from the start of the reservation.  We allocate
> > > > > +	 * blocks in order to place blocks on disk in increasing record or key
> > > > > +	 * order.  The block reservations tend to end up on the list in
> > > > > +	 * decreasing order, which hopefully results in leaf blocks ending up
> > > > > +	 * together.
> > > > > +	 */
> > > > > +	fsb = resv->fsbno + resv->used;
> > > > > +	resv->used++;
> > > > > +
> > > > > +	/* If we used all the blocks in this reservation, move it to the end. */
> > > > > +	if (resv->used == resv->len)
> > > > > +		list_move_tail(&resv->list, &xnr->resv_list);
> > > > > +
> > > > > +	trace_xrep_newbt_claim_block(cur->bc_mp,
> > > > > +			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
> > > > > +			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
> > > > > +			1, xnr->oinfo.oi_owner);
> > > > > +
> > > > > +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
> > > > > +		ptr->l = cpu_to_be64(fsb);
> > > > > +	else
> > > > > +		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
> > > > > +	return 0;
> > > > > +}
> > > > > diff --git a/repair/bload.h b/repair/bload.h
> > > > > new file mode 100644
> > > > > index 00000000..020c4834
> > > > > --- /dev/null
> > > > > +++ b/repair/bload.h
> > > > > @@ -0,0 +1,77 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > > +/*
> > > > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > + */
> > > > > +#ifndef __XFS_REPAIR_BLOAD_H__
> > > > > +#define __XFS_REPAIR_BLOAD_H__
> > > > > +
> > > > > +extern int bload_leaf_slack;
> > > > > +extern int bload_node_slack;
> > > > > +
> > > > > +struct repair_ctx {
> > > > > +	struct xfs_mount	*mp;
> > > > > +	struct xfs_inode	*ip;
> > > > > +	struct xfs_trans	*tp;
> > > > > +
> > > > > +	struct xfs_buf		*agi_bp;
> > > > > +	struct xfs_buf		*agf_bp;
> > > > > +	struct xfs_buf		*agfl_bp;
> > > > > +};
> > > > > +
> > > > > +struct xrep_newbt_resv {
> > > > > +	/* Link to list of extents that we've reserved. */
> > > > > +	struct list_head	list;
> > > > > +
> > > > > +	/* FSB of the block we reserved. */
> > > > > +	xfs_fsblock_t		fsbno;
> > > > > +
> > > > > +	/* Length of the reservation. */
> > > > > +	xfs_extlen_t		len;
> > > > > +
> > > > > +	/* How much of this reservation we've used. */
> > > > > +	xfs_extlen_t		used;
> > > > > +};
> > > > > +
> > > > > +struct xrep_newbt {
> > > > > +	struct repair_ctx	*sc;
> > > > > +
> > > > > +	/* List of extents that we've reserved. */
> > > > > +	struct list_head	resv_list;
> > > > > +
> > > > > +	/* Fake root for new btree. */
> > > > > +	union {
> > > > > +		struct xbtree_afakeroot	afake;
> > > > > +		struct xbtree_ifakeroot	ifake;
> > > > > +	};
> > > > > +
> > > > > +	/* rmap owner of these blocks */
> > > > > +	struct xfs_owner_info	oinfo;
> > > > > +
> > > > > +	/* The last reservation we allocated from. */
> > > > > +	struct xrep_newbt_resv	*last_resv;
> > > > > +
> > > > > +	/* Allocation hint */
> > > > > +	xfs_fsblock_t		alloc_hint;
> > > > > +
> > > > > +	/* per-ag reservation type */
> > > > > +	enum xfs_ag_resv_type	resv;
> > > > > +};
> > > > > +
> > > > > +#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
> > > > > +	list_for_each_entry_safe((resv), (n), &(xnr)->resv_list, list)
> > > > > +
> > > > > +void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct repair_ctx *sc);
> > > > > +void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > > > > +		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
> > > > > +		enum xfs_ag_resv_type resv);
> > > > > +void xrep_newbt_init_inode(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > > > > +		int whichfork, const struct xfs_owner_info *oinfo);
> > > > > +int xrep_newbt_add_blocks(struct xrep_newbt *xnr, xfs_fsblock_t fsbno,
> > > > > +		xfs_extlen_t len);
> > > > > +int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
> > > > > +void xrep_newbt_destroy(struct xrep_newbt *xnr, int error);
> > > > > +int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
> > > > > +		union xfs_btree_ptr *ptr);
> > > > > +
> > > > > +#endif /* __XFS_REPAIR_BLOAD_H__ */
> > > > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > > > index 9d72fa8e..8fbd3649 100644
> > > > > --- a/repair/xfs_repair.c
> > > > > +++ b/repair/xfs_repair.c
> > > > > @@ -24,6 +24,7 @@
> > > > >  #include "rmap.h"
> > > > >  #include "libfrog/fsgeom.h"
> > > > >  #include "libfrog/platform.h"
> > > > > +#include "bload.h"
> > > > >  
> > > > >  /*
> > > > >   * option tables for getsubopt calls
> > > > > @@ -39,6 +40,8 @@ enum o_opt_nums {
> > > > >  	AG_STRIDE,
> > > > >  	FORCE_GEO,
> > > > >  	PHASE2_THREADS,
> > > > > +	BLOAD_LEAF_SLACK,
> > > > > +	BLOAD_NODE_SLACK,
> > > > >  	O_MAX_OPTS,
> > > > >  };
> > > > >  
> > > > > @@ -49,6 +52,8 @@ static char *o_opts[] = {
> > > > >  	[AG_STRIDE]		= "ag_stride",
> > > > >  	[FORCE_GEO]		= "force_geometry",
> > > > >  	[PHASE2_THREADS]	= "phase2_threads",
> > > > > +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> > > > > +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
> > > > >  	[O_MAX_OPTS]		= NULL,
> > > > >  };
> > > > >  
> > > > > @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
> > > > >  		_("-o phase2_threads requires a parameter\n"));
> > > > >  					phase2_threads = (int)strtol(val, NULL, 0);
> > > > >  					break;
> > > > > +				case BLOAD_LEAF_SLACK:
> > > > > +					if (!val)
> > > > > +						do_abort(
> > > > > +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> > > > > +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> > > > > +					break;
> > > > > +				case BLOAD_NODE_SLACK:
> > > > > +					if (!val)
> > > > > +						do_abort(
> > > > > +		_("-o debug_bload_node_slack requires a parameter\n"));
> > > > > +					bload_node_slack = (int)strtol(val, NULL, 0);
> > > > > +					break;
> > > > >  				default:
> > > > >  					unknown('o', val);
> > > > >  					break;
> > > > > 
> > > > 
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-28 15:08       ` Brian Foster
@ 2020-05-29 21:01         ` Darrick J. Wong
  2020-06-01 12:03           ` Brian Foster
  0 siblings, 1 reply; 25+ messages in thread
From: Darrick J. Wong @ 2020-05-29 21:01 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Thu, May 28, 2020 at 11:08:36AM -0400, Brian Foster wrote:
> On Wed, May 27, 2020 at 03:34:24PM -0700, Darrick J. Wong wrote:
> > On Wed, May 27, 2020 at 08:15:31AM -0400, Brian Foster wrote:
> > > On Tue, May 19, 2020 at 06:50:49PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > 
> > > > Port the new btree staging context and related block reservation helper
> > > > code from the kernel to repair.  We'll use this in subsequent patches to
> > > > implement btree bulk loading.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > >  include/libxfs.h         |    1 
> > > >  libxfs/libxfs_api_defs.h |    2 
> > > >  repair/Makefile          |    4 -
> > > >  repair/bload.c           |  303 ++++++++++++++++++++++++++++++++++++++++++++++
> > > >  repair/bload.h           |   77 ++++++++++++
> > > >  repair/xfs_repair.c      |   17 +++
> > > >  6 files changed, 402 insertions(+), 2 deletions(-)
> > > >  create mode 100644 repair/bload.c
> > > >  create mode 100644 repair/bload.h
> > > > 
> > > > 
> > > ...
> > > > diff --git a/repair/bload.c b/repair/bload.c
> > > > new file mode 100644
> > > > index 00000000..9bc17468
> > > > --- /dev/null
> > > > +++ b/repair/bload.c
> > > > @@ -0,0 +1,303 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > +/*
> > > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > > + */
> > > > +#include <libxfs.h>
> > > > +#include "bload.h"
> > > > +
> > > > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > > > +#define trace_xrep_newbt_free_blocks(...)	((void) 0)
> > > > +
> > > > +int bload_leaf_slack = -1;
> > > > +int bload_node_slack = -1;
> > > > +
> > > > +/* Ported routines from fs/xfs/scrub/repair.c */
> > > > +
> > > 
> > > Looks mostly straightforward, but I'll have to come back to this as I
> > > get to the code that uses it later in the series. In the meantime, I see
> > > some of these helpers in scrub/repair.c while not others. Are there
> > > references to other routines that are intended to be copies from kernel
> > > code?
> > 
> > Hm.  I might not understand the question, but in general the code should
> > be fairly similar to the kernel functions.  The biggest differences are
> > (a) that whole libxfs error code mess, (b) the much simpler repair_ctx
> > structure, and (c) the fact that repair doesn't bother with EFIs to
> > automatically reap blocks.
> > 
> > So... the ten functions you see here do the same things as their kernel
> > counterparts, but they get to do it in the much simpler userspace
> > environment.
> > 
> 
> Right.. I was able to find the first function (xrep_roll_ag_trans())
> easily in the kernel because it has the same name. The next one or two
> (i.e., xrep_newbt_*()) I couldn't find and then gave up. Are they
> renamed? Unmerged?

Ahh, silly me, the rest are as yet unmerged components of the online
repair code.  Maybe it makes more sense to drop the "Ported routines
from XXXX" comment, particularly since it probably will be easier to
merge the xfs_repair series first, which is /much/ smaller in scope.

> > The other functions in scrub/repair.c that didn't get ported are either
> > for other types of repairs or exist to support the in-kernel code and
> > aren't needed here.
> > 
> 
> Sure, I'm just curious how to identify the source of the ones that are.

https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux.git/tree/fs/xfs/scrub/repair.c?h=djwong-wtf

Is a fairly recent snapshot of what the kernel version looks like.

--D

> 
> Brian
> 
> > --D
> > 
> > > Brian
> > > 
> > > > +/*
> > > > + * Roll a transaction, keeping the AG headers locked and reinitializing
> > > > + * the btree cursors.
> > > > + */
> > > > +int
> > > > +xrep_roll_ag_trans(
> > > > +	struct repair_ctx	*sc)
> > > > +{
> > > > +	int			error;
> > > > +
> > > > +	/* Keep the AG header buffers locked so we can keep going. */
> > > > +	if (sc->agi_bp)
> > > > +		libxfs_trans_bhold(sc->tp, sc->agi_bp);
> > > > +	if (sc->agf_bp)
> > > > +		libxfs_trans_bhold(sc->tp, sc->agf_bp);
> > > > +	if (sc->agfl_bp)
> > > > +		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
> > > > +
> > > > +	/*
> > > > +	 * Roll the transaction.  We still own the buffer and the buffer lock
> > > > +	 * regardless of whether or not the roll succeeds.  If the roll fails,
> > > > +	 * the buffers will be released during teardown on our way out of the
> > > > +	 * kernel.  If it succeeds, we join them to the new transaction and
> > > > +	 * move on.
> > > > +	 */
> > > > +	error = -libxfs_trans_roll(&sc->tp);
> > > > +	if (error)
> > > > +		return error;
> > > > +
> > > > +	/* Join AG headers to the new transaction. */
> > > > +	if (sc->agi_bp)
> > > > +		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
> > > > +	if (sc->agf_bp)
> > > > +		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
> > > > +	if (sc->agfl_bp)
> > > > +		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +/* Initialize accounting resources for staging a new AG btree. */
> > > > +void
> > > > +xrep_newbt_init_ag(
> > > > +	struct xrep_newbt		*xnr,
> > > > +	struct repair_ctx		*sc,
> > > > +	const struct xfs_owner_info	*oinfo,
> > > > +	xfs_fsblock_t			alloc_hint,
> > > > +	enum xfs_ag_resv_type		resv)
> > > > +{
> > > > +	memset(xnr, 0, sizeof(struct xrep_newbt));
> > > > +	xnr->sc = sc;
> > > > +	xnr->oinfo = *oinfo; /* structure copy */
> > > > +	xnr->alloc_hint = alloc_hint;
> > > > +	xnr->resv = resv;
> > > > +	INIT_LIST_HEAD(&xnr->resv_list);
> > > > +}
> > > > +
> > > > +/* Initialize accounting resources for staging a new inode fork btree. */
> > > > +void
> > > > +xrep_newbt_init_inode(
> > > > +	struct xrep_newbt		*xnr,
> > > > +	struct repair_ctx		*sc,
> > > > +	int				whichfork,
> > > > +	const struct xfs_owner_info	*oinfo)
> > > > +{
> > > > +	xrep_newbt_init_ag(xnr, sc, oinfo,
> > > > +			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
> > > > +			XFS_AG_RESV_NONE);
> > > > +	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
> > > > +	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
> > > > +}
> > > > +
> > > > +/*
> > > > + * Initialize accounting resources for staging a new btree.  Callers are
> > > > + * expected to add their own reservations (and clean them up) manually.
> > > > + */
> > > > +void
> > > > +xrep_newbt_init_bare(
> > > > +	struct xrep_newbt		*xnr,
> > > > +	struct repair_ctx		*sc)
> > > > +{
> > > > +	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
> > > > +			XFS_AG_RESV_NONE);
> > > > +}
> > > > +
> > > > +/* Designate specific blocks to be used to build our new btree. */
> > > > +int
> > > > +xrep_newbt_add_blocks(
> > > > +	struct xrep_newbt	*xnr,
> > > > +	xfs_fsblock_t		fsbno,
> > > > +	xfs_extlen_t		len)
> > > > +{
> > > > +	struct xrep_newbt_resv	*resv;
> > > > +
> > > > +	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
> > > > +	if (!resv)
> > > > +		return ENOMEM;
> > > > +
> > > > +	INIT_LIST_HEAD(&resv->list);
> > > > +	resv->fsbno = fsbno;
> > > > +	resv->len = len;
> > > > +	resv->used = 0;
> > > > +	list_add_tail(&resv->list, &xnr->resv_list);
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +/* Reserve disk space for our new btree. */
> > > > +int
> > > > +xrep_newbt_alloc_blocks(
> > > > +	struct xrep_newbt	*xnr,
> > > > +	uint64_t		nr_blocks)
> > > > +{
> > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > +	xfs_alloctype_t		type;
> > > > +	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
> > > > +	int			error = 0;
> > > > +
> > > > +	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
> > > > +
> > > > +	while (nr_blocks > 0 && !error) {
> > > > +		struct xfs_alloc_arg	args = {
> > > > +			.tp		= sc->tp,
> > > > +			.mp		= sc->mp,
> > > > +			.type		= type,
> > > > +			.fsbno		= alloc_hint,
> > > > +			.oinfo		= xnr->oinfo,
> > > > +			.minlen		= 1,
> > > > +			.maxlen		= nr_blocks,
> > > > +			.prod		= 1,
> > > > +			.resv		= xnr->resv,
> > > > +		};
> > > > +
> > > > +		error = -libxfs_alloc_vextent(&args);
> > > > +		if (error)
> > > > +			return error;
> > > > +		if (args.fsbno == NULLFSBLOCK)
> > > > +			return ENOSPC;
> > > > +
> > > > +		/* We don't have real EFIs here so skip that. */
> > > > +
> > > > +		error = xrep_newbt_add_blocks(xnr, args.fsbno, args.len);
> > > > +		if (error)
> > > > +			break;
> > > > +
> > > > +		nr_blocks -= args.len;
> > > > +		alloc_hint = args.fsbno + args.len - 1;
> > > > +
> > > > +		if (sc->ip)
> > > > +			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > > > +		else
> > > > +			error = xrep_roll_ag_trans(sc);
> > > > +	}
> > > > +
> > > > +	return error;
> > > > +}
> > > > +
> > > > +/*
> > > > + * Release blocks that were reserved for a btree repair.  If the repair
> > > > + * succeeded then we log deferred frees for unused blocks.  Otherwise, we try
> > > > + * to free the extents immediately to roll the filesystem back to where it was
> > > > + * before we started.
> > > > + */
> > > > +static inline int
> > > > +xrep_newbt_destroy_reservation(
> > > > +	struct xrep_newbt	*xnr,
> > > > +	struct xrep_newbt_resv	*resv,
> > > > +	bool			cancel_repair)
> > > > +{
> > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > +
> > > > +	if (cancel_repair) {
> > > > +		int		error;
> > > > +
> > > > +		/* Free the extent then roll the transaction. */
> > > > +		error = -libxfs_free_extent(sc->tp, resv->fsbno, resv->len,
> > > > +				&xnr->oinfo, xnr->resv);
> > > > +		if (error)
> > > > +			return error;
> > > > +
> > > > +		if (sc->ip)
> > > > +			return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > > > +		return xrep_roll_ag_trans(sc);
> > > > +	}
> > > > +
> > > > +	/* We don't have EFIs here so skip the EFD. */
> > > > +
> > > > +	/*
> > > > +	 * Use the deferred freeing mechanism to schedule for deletion any
> > > > +	 * blocks we didn't use to rebuild the tree.  This enables us to log
> > > > +	 * them all in the same transaction as the root change.
> > > > +	 */
> > > > +	resv->fsbno += resv->used;
> > > > +	resv->len -= resv->used;
> > > > +	resv->used = 0;
> > > > +
> > > > +	if (resv->len == 0)
> > > > +		return 0;
> > > > +
> > > > +	trace_xrep_newbt_free_blocks(sc->mp,
> > > > +			XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> > > > +			XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> > > > +			resv->len, xnr->oinfo.oi_owner);
> > > > +
> > > > +	__xfs_bmap_add_free(sc->tp, resv->fsbno, resv->len, &xnr->oinfo, true);
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +/* Free all the accounting info and disk space we reserved for a new btree. */
> > > > +void
> > > > +xrep_newbt_destroy(
> > > > +	struct xrep_newbt	*xnr,
> > > > +	int			error)
> > > > +{
> > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > +	struct xrep_newbt_resv	*resv, *n;
> > > > +	int			err2;
> > > > +
> > > > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > > > +		err2 = xrep_newbt_destroy_reservation(xnr, resv, error != 0);
> > > > +		if (err2)
> > > > +			goto junkit;
> > > > +
> > > > +		list_del(&resv->list);
> > > > +		kmem_free(resv);
> > > > +	}
> > > > +
> > > > +junkit:
> > > > +	/*
> > > > +	 * If we still have reservations attached to @newbt, cleanup must have
> > > > +	 * failed and the filesystem is about to go down.  Clean up the incore
> > > > +	 * reservations.
> > > > +	 */
> > > > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > > > +		list_del(&resv->list);
> > > > +		kmem_free(resv);
> > > > +	}
> > > > +
> > > > +	if (sc->ip) {
> > > > +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> > > > +		xnr->ifake.if_fork = NULL;
> > > > +	}
> > > > +}
> > > > +
> > > > +/* Feed one of the reserved btree blocks to the bulk loader. */
> > > > +int
> > > > +xrep_newbt_claim_block(
> > > > +	struct xfs_btree_cur	*cur,
> > > > +	struct xrep_newbt	*xnr,
> > > > +	union xfs_btree_ptr	*ptr)
> > > > +{
> > > > +	struct xrep_newbt_resv	*resv;
> > > > +	xfs_fsblock_t		fsb;
> > > > +
> > > > +	/*
> > > > +	 * The first item in the list should always have a free block unless
> > > > +	 * we're completely out.
> > > > +	 */
> > > > +	resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
> > > > +	if (resv->used == resv->len)
> > > > +		return ENOSPC;
> > > > +
> > > > +	/*
> > > > +	 * Peel off a block from the start of the reservation.  We allocate
> > > > +	 * blocks in order to place blocks on disk in increasing record or key
> > > > +	 * order.  The block reservations tend to end up on the list in
> > > > +	 * decreasing order, which hopefully results in leaf blocks ending up
> > > > +	 * together.
> > > > +	 */
> > > > +	fsb = resv->fsbno + resv->used;
> > > > +	resv->used++;
> > > > +
> > > > +	/* If we used all the blocks in this reservation, move it to the end. */
> > > > +	if (resv->used == resv->len)
> > > > +		list_move_tail(&resv->list, &xnr->resv_list);
> > > > +
> > > > +	trace_xrep_newbt_claim_block(cur->bc_mp,
> > > > +			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
> > > > +			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
> > > > +			1, xnr->oinfo.oi_owner);
> > > > +
> > > > +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
> > > > +		ptr->l = cpu_to_be64(fsb);
> > > > +	else
> > > > +		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
> > > > +	return 0;
> > > > +}
> > > > diff --git a/repair/bload.h b/repair/bload.h
> > > > new file mode 100644
> > > > index 00000000..020c4834
> > > > --- /dev/null
> > > > +++ b/repair/bload.h
> > > > @@ -0,0 +1,77 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > +/*
> > > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > > + */
> > > > +#ifndef __XFS_REPAIR_BLOAD_H__
> > > > +#define __XFS_REPAIR_BLOAD_H__
> > > > +
> > > > +extern int bload_leaf_slack;
> > > > +extern int bload_node_slack;
> > > > +
> > > > +struct repair_ctx {
> > > > +	struct xfs_mount	*mp;
> > > > +	struct xfs_inode	*ip;
> > > > +	struct xfs_trans	*tp;
> > > > +
> > > > +	struct xfs_buf		*agi_bp;
> > > > +	struct xfs_buf		*agf_bp;
> > > > +	struct xfs_buf		*agfl_bp;
> > > > +};
> > > > +
> > > > +struct xrep_newbt_resv {
> > > > +	/* Link to list of extents that we've reserved. */
> > > > +	struct list_head	list;
> > > > +
> > > > +	/* FSB of the block we reserved. */
> > > > +	xfs_fsblock_t		fsbno;
> > > > +
> > > > +	/* Length of the reservation. */
> > > > +	xfs_extlen_t		len;
> > > > +
> > > > +	/* How much of this reservation we've used. */
> > > > +	xfs_extlen_t		used;
> > > > +};
> > > > +
> > > > +struct xrep_newbt {
> > > > +	struct repair_ctx	*sc;
> > > > +
> > > > +	/* List of extents that we've reserved. */
> > > > +	struct list_head	resv_list;
> > > > +
> > > > +	/* Fake root for new btree. */
> > > > +	union {
> > > > +		struct xbtree_afakeroot	afake;
> > > > +		struct xbtree_ifakeroot	ifake;
> > > > +	};
> > > > +
> > > > +	/* rmap owner of these blocks */
> > > > +	struct xfs_owner_info	oinfo;
> > > > +
> > > > +	/* The last reservation we allocated from. */
> > > > +	struct xrep_newbt_resv	*last_resv;
> > > > +
> > > > +	/* Allocation hint */
> > > > +	xfs_fsblock_t		alloc_hint;
> > > > +
> > > > +	/* per-ag reservation type */
> > > > +	enum xfs_ag_resv_type	resv;
> > > > +};
> > > > +
> > > > +#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
> > > > +	list_for_each_entry_safe((resv), (n), &(xnr)->resv_list, list)
> > > > +
> > > > +void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct repair_ctx *sc);
> > > > +void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > > > +		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
> > > > +		enum xfs_ag_resv_type resv);
> > > > +void xrep_newbt_init_inode(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > > > +		int whichfork, const struct xfs_owner_info *oinfo);
> > > > +int xrep_newbt_add_blocks(struct xrep_newbt *xnr, xfs_fsblock_t fsbno,
> > > > +		xfs_extlen_t len);
> > > > +int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
> > > > +void xrep_newbt_destroy(struct xrep_newbt *xnr, int error);
> > > > +int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
> > > > +		union xfs_btree_ptr *ptr);
> > > > +
> > > > +#endif /* __XFS_REPAIR_BLOAD_H__ */
> > > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > > index 9d72fa8e..8fbd3649 100644
> > > > --- a/repair/xfs_repair.c
> > > > +++ b/repair/xfs_repair.c
> > > > @@ -24,6 +24,7 @@
> > > >  #include "rmap.h"
> > > >  #include "libfrog/fsgeom.h"
> > > >  #include "libfrog/platform.h"
> > > > +#include "bload.h"
> > > >  
> > > >  /*
> > > >   * option tables for getsubopt calls
> > > > @@ -39,6 +40,8 @@ enum o_opt_nums {
> > > >  	AG_STRIDE,
> > > >  	FORCE_GEO,
> > > >  	PHASE2_THREADS,
> > > > +	BLOAD_LEAF_SLACK,
> > > > +	BLOAD_NODE_SLACK,
> > > >  	O_MAX_OPTS,
> > > >  };
> > > >  
> > > > @@ -49,6 +52,8 @@ static char *o_opts[] = {
> > > >  	[AG_STRIDE]		= "ag_stride",
> > > >  	[FORCE_GEO]		= "force_geometry",
> > > >  	[PHASE2_THREADS]	= "phase2_threads",
> > > > +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> > > > +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
> > > >  	[O_MAX_OPTS]		= NULL,
> > > >  };
> > > >  
> > > > @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
> > > >  		_("-o phase2_threads requires a parameter\n"));
> > > >  					phase2_threads = (int)strtol(val, NULL, 0);
> > > >  					break;
> > > > +				case BLOAD_LEAF_SLACK:
> > > > +					if (!val)
> > > > +						do_abort(
> > > > +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> > > > +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> > > > +					break;
> > > > +				case BLOAD_NODE_SLACK:
> > > > +					if (!val)
> > > > +						do_abort(
> > > > +		_("-o debug_bload_node_slack requires a parameter\n"));
> > > > +					bload_node_slack = (int)strtol(val, NULL, 0);
> > > > +					break;
> > > >  				default:
> > > >  					unknown('o', val);
> > > >  					break;
> > > > 
> > > 
> > 
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-27 22:34     ` Darrick J. Wong
@ 2020-05-28 15:08       ` Brian Foster
  2020-05-29 21:01         ` Darrick J. Wong
  0 siblings, 1 reply; 25+ messages in thread
From: Brian Foster @ 2020-05-28 15:08 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Wed, May 27, 2020 at 03:34:24PM -0700, Darrick J. Wong wrote:
> On Wed, May 27, 2020 at 08:15:31AM -0400, Brian Foster wrote:
> > On Tue, May 19, 2020 at 06:50:49PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Port the new btree staging context and related block reservation helper
> > > code from the kernel to repair.  We'll use this in subsequent patches to
> > > implement btree bulk loading.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  include/libxfs.h         |    1 
> > >  libxfs/libxfs_api_defs.h |    2 
> > >  repair/Makefile          |    4 -
> > >  repair/bload.c           |  303 ++++++++++++++++++++++++++++++++++++++++++++++
> > >  repair/bload.h           |   77 ++++++++++++
> > >  repair/xfs_repair.c      |   17 +++
> > >  6 files changed, 402 insertions(+), 2 deletions(-)
> > >  create mode 100644 repair/bload.c
> > >  create mode 100644 repair/bload.h
> > > 
> > > 
> > ...
> > > diff --git a/repair/bload.c b/repair/bload.c
> > > new file mode 100644
> > > index 00000000..9bc17468
> > > --- /dev/null
> > > +++ b/repair/bload.c
> > > @@ -0,0 +1,303 @@
> > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > +/*
> > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > + */
> > > +#include <libxfs.h>
> > > +#include "bload.h"
> > > +
> > > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > > +#define trace_xrep_newbt_free_blocks(...)	((void) 0)
> > > +
> > > +int bload_leaf_slack = -1;
> > > +int bload_node_slack = -1;
> > > +
> > > +/* Ported routines from fs/xfs/scrub/repair.c */
> > > +
> > 
> > Looks mostly straightforward, but I'll have to come back to this as I
> > get to the code that uses it later in the series. In the meantime, I see
> > some of these helpers in scrub/repair.c while not others. Are there
> > references to other routines that are intended to be copies from kernel
> > code?
> 
> Hm.  I might not understand the question, but in general the code should
> be fairly similar to the kernel functions.  The biggest differences are
> (a) that whole libxfs error code mess, (b) the much simpler repair_ctx
> structure, and (c) the fact that repair doesn't bother with EFIs to
> automatically reap blocks.
> 
> So... the ten functions you see here do the same things as their kernel
> counterparts, but they get to do it in the much simpler userspace
> environment.
> 

Right.. I was able to find the first function (xrep_roll_ag_trans())
easily in the kernel because it has the same name. The next one or two
(i.e., xrep_newbt_*()) I couldn't find and then gave up. Are they
renamed? Unmerged?

> The other functions in scrub/repair.c that didn't get ported are either
> for other types of repairs or exist to support the in-kernel code and
> aren't needed here.
> 

Sure, I'm just curious how to identify the source of the ones that are.

Brian

> --D
> 
> > Brian
> > 
> > > +/*
> > > + * Roll a transaction, keeping the AG headers locked and reinitializing
> > > + * the btree cursors.
> > > + */
> > > +int
> > > +xrep_roll_ag_trans(
> > > +	struct repair_ctx	*sc)
> > > +{
> > > +	int			error;
> > > +
> > > +	/* Keep the AG header buffers locked so we can keep going. */
> > > +	if (sc->agi_bp)
> > > +		libxfs_trans_bhold(sc->tp, sc->agi_bp);
> > > +	if (sc->agf_bp)
> > > +		libxfs_trans_bhold(sc->tp, sc->agf_bp);
> > > +	if (sc->agfl_bp)
> > > +		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
> > > +
> > > +	/*
> > > +	 * Roll the transaction.  We still own the buffer and the buffer lock
> > > +	 * regardless of whether or not the roll succeeds.  If the roll fails,
> > > +	 * the buffers will be released during teardown on our way out of the
> > > +	 * kernel.  If it succeeds, we join them to the new transaction and
> > > +	 * move on.
> > > +	 */
> > > +	error = -libxfs_trans_roll(&sc->tp);
> > > +	if (error)
> > > +		return error;
> > > +
> > > +	/* Join AG headers to the new transaction. */
> > > +	if (sc->agi_bp)
> > > +		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
> > > +	if (sc->agf_bp)
> > > +		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
> > > +	if (sc->agfl_bp)
> > > +		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +/* Initialize accounting resources for staging a new AG btree. */
> > > +void
> > > +xrep_newbt_init_ag(
> > > +	struct xrep_newbt		*xnr,
> > > +	struct repair_ctx		*sc,
> > > +	const struct xfs_owner_info	*oinfo,
> > > +	xfs_fsblock_t			alloc_hint,
> > > +	enum xfs_ag_resv_type		resv)
> > > +{
> > > +	memset(xnr, 0, sizeof(struct xrep_newbt));
> > > +	xnr->sc = sc;
> > > +	xnr->oinfo = *oinfo; /* structure copy */
> > > +	xnr->alloc_hint = alloc_hint;
> > > +	xnr->resv = resv;
> > > +	INIT_LIST_HEAD(&xnr->resv_list);
> > > +}
> > > +
> > > +/* Initialize accounting resources for staging a new inode fork btree. */
> > > +void
> > > +xrep_newbt_init_inode(
> > > +	struct xrep_newbt		*xnr,
> > > +	struct repair_ctx		*sc,
> > > +	int				whichfork,
> > > +	const struct xfs_owner_info	*oinfo)
> > > +{
> > > +	xrep_newbt_init_ag(xnr, sc, oinfo,
> > > +			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
> > > +			XFS_AG_RESV_NONE);
> > > +	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
> > > +	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
> > > +}
> > > +
> > > +/*
> > > + * Initialize accounting resources for staging a new btree.  Callers are
> > > + * expected to add their own reservations (and clean them up) manually.
> > > + */
> > > +void
> > > +xrep_newbt_init_bare(
> > > +	struct xrep_newbt		*xnr,
> > > +	struct repair_ctx		*sc)
> > > +{
> > > +	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
> > > +			XFS_AG_RESV_NONE);
> > > +}
> > > +
> > > +/* Designate specific blocks to be used to build our new btree. */
> > > +int
> > > +xrep_newbt_add_blocks(
> > > +	struct xrep_newbt	*xnr,
> > > +	xfs_fsblock_t		fsbno,
> > > +	xfs_extlen_t		len)
> > > +{
> > > +	struct xrep_newbt_resv	*resv;
> > > +
> > > +	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
> > > +	if (!resv)
> > > +		return ENOMEM;
> > > +
> > > +	INIT_LIST_HEAD(&resv->list);
> > > +	resv->fsbno = fsbno;
> > > +	resv->len = len;
> > > +	resv->used = 0;
> > > +	list_add_tail(&resv->list, &xnr->resv_list);
> > > +	return 0;
> > > +}
> > > +
> > > +/* Reserve disk space for our new btree. */
> > > +int
> > > +xrep_newbt_alloc_blocks(
> > > +	struct xrep_newbt	*xnr,
> > > +	uint64_t		nr_blocks)
> > > +{
> > > +	struct repair_ctx	*sc = xnr->sc;
> > > +	xfs_alloctype_t		type;
> > > +	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
> > > +	int			error = 0;
> > > +
> > > +	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
> > > +
> > > +	while (nr_blocks > 0 && !error) {
> > > +		struct xfs_alloc_arg	args = {
> > > +			.tp		= sc->tp,
> > > +			.mp		= sc->mp,
> > > +			.type		= type,
> > > +			.fsbno		= alloc_hint,
> > > +			.oinfo		= xnr->oinfo,
> > > +			.minlen		= 1,
> > > +			.maxlen		= nr_blocks,
> > > +			.prod		= 1,
> > > +			.resv		= xnr->resv,
> > > +		};
> > > +
> > > +		error = -libxfs_alloc_vextent(&args);
> > > +		if (error)
> > > +			return error;
> > > +		if (args.fsbno == NULLFSBLOCK)
> > > +			return ENOSPC;
> > > +
> > > +		/* We don't have real EFIs here so skip that. */
> > > +
> > > +		error = xrep_newbt_add_blocks(xnr, args.fsbno, args.len);
> > > +		if (error)
> > > +			break;
> > > +
> > > +		nr_blocks -= args.len;
> > > +		alloc_hint = args.fsbno + args.len - 1;
> > > +
> > > +		if (sc->ip)
> > > +			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > > +		else
> > > +			error = xrep_roll_ag_trans(sc);
> > > +	}
> > > +
> > > +	return error;
> > > +}
> > > +
> > > +/*
> > > + * Release blocks that were reserved for a btree repair.  If the repair
> > > + * succeeded then we log deferred frees for unused blocks.  Otherwise, we try
> > > + * to free the extents immediately to roll the filesystem back to where it was
> > > + * before we started.
> > > + */
> > > +static inline int
> > > +xrep_newbt_destroy_reservation(
> > > +	struct xrep_newbt	*xnr,
> > > +	struct xrep_newbt_resv	*resv,
> > > +	bool			cancel_repair)
> > > +{
> > > +	struct repair_ctx	*sc = xnr->sc;
> > > +
> > > +	if (cancel_repair) {
> > > +		int		error;
> > > +
> > > +		/* Free the extent then roll the transaction. */
> > > +		error = -libxfs_free_extent(sc->tp, resv->fsbno, resv->len,
> > > +				&xnr->oinfo, xnr->resv);
> > > +		if (error)
> > > +			return error;
> > > +
> > > +		if (sc->ip)
> > > +			return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > > +		return xrep_roll_ag_trans(sc);
> > > +	}
> > > +
> > > +	/* We don't have EFIs here so skip the EFD. */
> > > +
> > > +	/*
> > > +	 * Use the deferred freeing mechanism to schedule for deletion any
> > > +	 * blocks we didn't use to rebuild the tree.  This enables us to log
> > > +	 * them all in the same transaction as the root change.
> > > +	 */
> > > +	resv->fsbno += resv->used;
> > > +	resv->len -= resv->used;
> > > +	resv->used = 0;
> > > +
> > > +	if (resv->len == 0)
> > > +		return 0;
> > > +
> > > +	trace_xrep_newbt_free_blocks(sc->mp,
> > > +			XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> > > +			XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> > > +			resv->len, xnr->oinfo.oi_owner);
> > > +
> > > +	__xfs_bmap_add_free(sc->tp, resv->fsbno, resv->len, &xnr->oinfo, true);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +/* Free all the accounting info and disk space we reserved for a new btree. */
> > > +void
> > > +xrep_newbt_destroy(
> > > +	struct xrep_newbt	*xnr,
> > > +	int			error)
> > > +{
> > > +	struct repair_ctx	*sc = xnr->sc;
> > > +	struct xrep_newbt_resv	*resv, *n;
> > > +	int			err2;
> > > +
> > > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > > +		err2 = xrep_newbt_destroy_reservation(xnr, resv, error != 0);
> > > +		if (err2)
> > > +			goto junkit;
> > > +
> > > +		list_del(&resv->list);
> > > +		kmem_free(resv);
> > > +	}
> > > +
> > > +junkit:
> > > +	/*
> > > +	 * If we still have reservations attached to @newbt, cleanup must have
> > > +	 * failed and the filesystem is about to go down.  Clean up the incore
> > > +	 * reservations.
> > > +	 */
> > > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > > +		list_del(&resv->list);
> > > +		kmem_free(resv);
> > > +	}
> > > +
> > > +	if (sc->ip) {
> > > +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> > > +		xnr->ifake.if_fork = NULL;
> > > +	}
> > > +}
> > > +
> > > +/* Feed one of the reserved btree blocks to the bulk loader. */
> > > +int
> > > +xrep_newbt_claim_block(
> > > +	struct xfs_btree_cur	*cur,
> > > +	struct xrep_newbt	*xnr,
> > > +	union xfs_btree_ptr	*ptr)
> > > +{
> > > +	struct xrep_newbt_resv	*resv;
> > > +	xfs_fsblock_t		fsb;
> > > +
> > > +	/*
> > > +	 * The first item in the list should always have a free block unless
> > > +	 * we're completely out.
> > > +	 */
> > > +	resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
> > > +	if (resv->used == resv->len)
> > > +		return ENOSPC;
> > > +
> > > +	/*
> > > +	 * Peel off a block from the start of the reservation.  We allocate
> > > +	 * blocks in order to place blocks on disk in increasing record or key
> > > +	 * order.  The block reservations tend to end up on the list in
> > > +	 * decreasing order, which hopefully results in leaf blocks ending up
> > > +	 * together.
> > > +	 */
> > > +	fsb = resv->fsbno + resv->used;
> > > +	resv->used++;
> > > +
> > > +	/* If we used all the blocks in this reservation, move it to the end. */
> > > +	if (resv->used == resv->len)
> > > +		list_move_tail(&resv->list, &xnr->resv_list);
> > > +
> > > +	trace_xrep_newbt_claim_block(cur->bc_mp,
> > > +			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
> > > +			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
> > > +			1, xnr->oinfo.oi_owner);
> > > +
> > > +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
> > > +		ptr->l = cpu_to_be64(fsb);
> > > +	else
> > > +		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
> > > +	return 0;
> > > +}
> > > diff --git a/repair/bload.h b/repair/bload.h
> > > new file mode 100644
> > > index 00000000..020c4834
> > > --- /dev/null
> > > +++ b/repair/bload.h
> > > @@ -0,0 +1,77 @@
> > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > +/*
> > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > + */
> > > +#ifndef __XFS_REPAIR_BLOAD_H__
> > > +#define __XFS_REPAIR_BLOAD_H__
> > > +
> > > +extern int bload_leaf_slack;
> > > +extern int bload_node_slack;
> > > +
> > > +struct repair_ctx {
> > > +	struct xfs_mount	*mp;
> > > +	struct xfs_inode	*ip;
> > > +	struct xfs_trans	*tp;
> > > +
> > > +	struct xfs_buf		*agi_bp;
> > > +	struct xfs_buf		*agf_bp;
> > > +	struct xfs_buf		*agfl_bp;
> > > +};
> > > +
> > > +struct xrep_newbt_resv {
> > > +	/* Link to list of extents that we've reserved. */
> > > +	struct list_head	list;
> > > +
> > > +	/* FSB of the block we reserved. */
> > > +	xfs_fsblock_t		fsbno;
> > > +
> > > +	/* Length of the reservation. */
> > > +	xfs_extlen_t		len;
> > > +
> > > +	/* How much of this reservation we've used. */
> > > +	xfs_extlen_t		used;
> > > +};
> > > +
> > > +struct xrep_newbt {
> > > +	struct repair_ctx	*sc;
> > > +
> > > +	/* List of extents that we've reserved. */
> > > +	struct list_head	resv_list;
> > > +
> > > +	/* Fake root for new btree. */
> > > +	union {
> > > +		struct xbtree_afakeroot	afake;
> > > +		struct xbtree_ifakeroot	ifake;
> > > +	};
> > > +
> > > +	/* rmap owner of these blocks */
> > > +	struct xfs_owner_info	oinfo;
> > > +
> > > +	/* The last reservation we allocated from. */
> > > +	struct xrep_newbt_resv	*last_resv;
> > > +
> > > +	/* Allocation hint */
> > > +	xfs_fsblock_t		alloc_hint;
> > > +
> > > +	/* per-ag reservation type */
> > > +	enum xfs_ag_resv_type	resv;
> > > +};
> > > +
> > > +#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
> > > +	list_for_each_entry_safe((resv), (n), &(xnr)->resv_list, list)
> > > +
> > > +void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct repair_ctx *sc);
> > > +void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > > +		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
> > > +		enum xfs_ag_resv_type resv);
> > > +void xrep_newbt_init_inode(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > > +		int whichfork, const struct xfs_owner_info *oinfo);
> > > +int xrep_newbt_add_blocks(struct xrep_newbt *xnr, xfs_fsblock_t fsbno,
> > > +		xfs_extlen_t len);
> > > +int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
> > > +void xrep_newbt_destroy(struct xrep_newbt *xnr, int error);
> > > +int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
> > > +		union xfs_btree_ptr *ptr);
> > > +
> > > +#endif /* __XFS_REPAIR_BLOAD_H__ */
> > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > index 9d72fa8e..8fbd3649 100644
> > > --- a/repair/xfs_repair.c
> > > +++ b/repair/xfs_repair.c
> > > @@ -24,6 +24,7 @@
> > >  #include "rmap.h"
> > >  #include "libfrog/fsgeom.h"
> > >  #include "libfrog/platform.h"
> > > +#include "bload.h"
> > >  
> > >  /*
> > >   * option tables for getsubopt calls
> > > @@ -39,6 +40,8 @@ enum o_opt_nums {
> > >  	AG_STRIDE,
> > >  	FORCE_GEO,
> > >  	PHASE2_THREADS,
> > > +	BLOAD_LEAF_SLACK,
> > > +	BLOAD_NODE_SLACK,
> > >  	O_MAX_OPTS,
> > >  };
> > >  
> > > @@ -49,6 +52,8 @@ static char *o_opts[] = {
> > >  	[AG_STRIDE]		= "ag_stride",
> > >  	[FORCE_GEO]		= "force_geometry",
> > >  	[PHASE2_THREADS]	= "phase2_threads",
> > > +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> > > +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
> > >  	[O_MAX_OPTS]		= NULL,
> > >  };
> > >  
> > > @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
> > >  		_("-o phase2_threads requires a parameter\n"));
> > >  					phase2_threads = (int)strtol(val, NULL, 0);
> > >  					break;
> > > +				case BLOAD_LEAF_SLACK:
> > > +					if (!val)
> > > +						do_abort(
> > > +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> > > +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> > > +					break;
> > > +				case BLOAD_NODE_SLACK:
> > > +					if (!val)
> > > +						do_abort(
> > > +		_("-o debug_bload_node_slack requires a parameter\n"));
> > > +					bload_node_slack = (int)strtol(val, NULL, 0);
> > > +					break;
> > >  				default:
> > >  					unknown('o', val);
> > >  					break;
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-27 12:15   ` Brian Foster
@ 2020-05-27 22:34     ` Darrick J. Wong
  2020-05-28 15:08       ` Brian Foster
  0 siblings, 1 reply; 25+ messages in thread
From: Darrick J. Wong @ 2020-05-27 22:34 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Wed, May 27, 2020 at 08:15:31AM -0400, Brian Foster wrote:
> On Tue, May 19, 2020 at 06:50:49PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Port the new btree staging context and related block reservation helper
> > code from the kernel to repair.  We'll use this in subsequent patches to
> > implement btree bulk loading.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  include/libxfs.h         |    1 
> >  libxfs/libxfs_api_defs.h |    2 
> >  repair/Makefile          |    4 -
> >  repair/bload.c           |  303 ++++++++++++++++++++++++++++++++++++++++++++++
> >  repair/bload.h           |   77 ++++++++++++
> >  repair/xfs_repair.c      |   17 +++
> >  6 files changed, 402 insertions(+), 2 deletions(-)
> >  create mode 100644 repair/bload.c
> >  create mode 100644 repair/bload.h
> > 
> > 
> ...
> > diff --git a/repair/bload.c b/repair/bload.c
> > new file mode 100644
> > index 00000000..9bc17468
> > --- /dev/null
> > +++ b/repair/bload.c
> > @@ -0,0 +1,303 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > + */
> > +#include <libxfs.h>
> > +#include "bload.h"
> > +
> > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > +#define trace_xrep_newbt_free_blocks(...)	((void) 0)
> > +
> > +int bload_leaf_slack = -1;
> > +int bload_node_slack = -1;
> > +
> > +/* Ported routines from fs/xfs/scrub/repair.c */
> > +
> 
> Looks mostly straightforward, but I'll have to come back to this as I
> get to the code that uses it later in the series. In the meantime, I see
> some of these helpers in scrub/repair.c while not others. Are there
> references to other routines that are intended to be copies from kernel
> code?

Hm.  I might not understand the question, but in general the code should
be fairly similar to the kernel functions.  The biggest differences are
(a) that whole libxfs error code mess, (b) the much simpler repair_ctx
structure, and (c) the fact that repair doesn't bother with EFIs to
automatically reap blocks.

So... the ten functions you see here do the same things as their kernel
counterparts, but they get to do it in the much simpler userspace
environment.

The other functions in scrub/repair.c that didn't get ported are either
for other types of repairs or exist to support the in-kernel code and
aren't needed here.

--D

> Brian
> 
> > +/*
> > + * Roll a transaction, keeping the AG headers locked and reinitializing
> > + * the btree cursors.
> > + */
> > +int
> > +xrep_roll_ag_trans(
> > +	struct repair_ctx	*sc)
> > +{
> > +	int			error;
> > +
> > +	/* Keep the AG header buffers locked so we can keep going. */
> > +	if (sc->agi_bp)
> > +		libxfs_trans_bhold(sc->tp, sc->agi_bp);
> > +	if (sc->agf_bp)
> > +		libxfs_trans_bhold(sc->tp, sc->agf_bp);
> > +	if (sc->agfl_bp)
> > +		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
> > +
> > +	/*
> > +	 * Roll the transaction.  We still own the buffer and the buffer lock
> > +	 * regardless of whether or not the roll succeeds.  If the roll fails,
> > +	 * the buffers will be released during teardown on our way out of the
> > +	 * kernel.  If it succeeds, we join them to the new transaction and
> > +	 * move on.
> > +	 */
> > +	error = -libxfs_trans_roll(&sc->tp);
> > +	if (error)
> > +		return error;
> > +
> > +	/* Join AG headers to the new transaction. */
> > +	if (sc->agi_bp)
> > +		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
> > +	if (sc->agf_bp)
> > +		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
> > +	if (sc->agfl_bp)
> > +		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
> > +
> > +	return 0;
> > +}
> > +
> > +/* Initialize accounting resources for staging a new AG btree. */
> > +void
> > +xrep_newbt_init_ag(
> > +	struct xrep_newbt		*xnr,
> > +	struct repair_ctx		*sc,
> > +	const struct xfs_owner_info	*oinfo,
> > +	xfs_fsblock_t			alloc_hint,
> > +	enum xfs_ag_resv_type		resv)
> > +{
> > +	memset(xnr, 0, sizeof(struct xrep_newbt));
> > +	xnr->sc = sc;
> > +	xnr->oinfo = *oinfo; /* structure copy */
> > +	xnr->alloc_hint = alloc_hint;
> > +	xnr->resv = resv;
> > +	INIT_LIST_HEAD(&xnr->resv_list);
> > +}
> > +
> > +/* Initialize accounting resources for staging a new inode fork btree. */
> > +void
> > +xrep_newbt_init_inode(
> > +	struct xrep_newbt		*xnr,
> > +	struct repair_ctx		*sc,
> > +	int				whichfork,
> > +	const struct xfs_owner_info	*oinfo)
> > +{
> > +	xrep_newbt_init_ag(xnr, sc, oinfo,
> > +			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
> > +			XFS_AG_RESV_NONE);
> > +	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
> > +	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
> > +}
> > +
> > +/*
> > + * Initialize accounting resources for staging a new btree.  Callers are
> > + * expected to add their own reservations (and clean them up) manually.
> > + */
> > +void
> > +xrep_newbt_init_bare(
> > +	struct xrep_newbt		*xnr,
> > +	struct repair_ctx		*sc)
> > +{
> > +	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
> > +			XFS_AG_RESV_NONE);
> > +}
> > +
> > +/* Designate specific blocks to be used to build our new btree. */
> > +int
> > +xrep_newbt_add_blocks(
> > +	struct xrep_newbt	*xnr,
> > +	xfs_fsblock_t		fsbno,
> > +	xfs_extlen_t		len)
> > +{
> > +	struct xrep_newbt_resv	*resv;
> > +
> > +	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
> > +	if (!resv)
> > +		return ENOMEM;
> > +
> > +	INIT_LIST_HEAD(&resv->list);
> > +	resv->fsbno = fsbno;
> > +	resv->len = len;
> > +	resv->used = 0;
> > +	list_add_tail(&resv->list, &xnr->resv_list);
> > +	return 0;
> > +}
> > +
> > +/* Reserve disk space for our new btree. */
> > +int
> > +xrep_newbt_alloc_blocks(
> > +	struct xrep_newbt	*xnr,
> > +	uint64_t		nr_blocks)
> > +{
> > +	struct repair_ctx	*sc = xnr->sc;
> > +	xfs_alloctype_t		type;
> > +	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
> > +	int			error = 0;
> > +
> > +	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
> > +
> > +	while (nr_blocks > 0 && !error) {
> > +		struct xfs_alloc_arg	args = {
> > +			.tp		= sc->tp,
> > +			.mp		= sc->mp,
> > +			.type		= type,
> > +			.fsbno		= alloc_hint,
> > +			.oinfo		= xnr->oinfo,
> > +			.minlen		= 1,
> > +			.maxlen		= nr_blocks,
> > +			.prod		= 1,
> > +			.resv		= xnr->resv,
> > +		};
> > +
> > +		error = -libxfs_alloc_vextent(&args);
> > +		if (error)
> > +			return error;
> > +		if (args.fsbno == NULLFSBLOCK)
> > +			return ENOSPC;
> > +
> > +		/* We don't have real EFIs here so skip that. */
> > +
> > +		error = xrep_newbt_add_blocks(xnr, args.fsbno, args.len);
> > +		if (error)
> > +			break;
> > +
> > +		nr_blocks -= args.len;
> > +		alloc_hint = args.fsbno + args.len - 1;
> > +
> > +		if (sc->ip)
> > +			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > +		else
> > +			error = xrep_roll_ag_trans(sc);
> > +	}
> > +
> > +	return error;
> > +}
> > +
> > +/*
> > + * Release blocks that were reserved for a btree repair.  If the repair
> > + * succeeded then we log deferred frees for unused blocks.  Otherwise, we try
> > + * to free the extents immediately to roll the filesystem back to where it was
> > + * before we started.
> > + */
> > +static inline int
> > +xrep_newbt_destroy_reservation(
> > +	struct xrep_newbt	*xnr,
> > +	struct xrep_newbt_resv	*resv,
> > +	bool			cancel_repair)
> > +{
> > +	struct repair_ctx	*sc = xnr->sc;
> > +
> > +	if (cancel_repair) {
> > +		int		error;
> > +
> > +		/* Free the extent then roll the transaction. */
> > +		error = -libxfs_free_extent(sc->tp, resv->fsbno, resv->len,
> > +				&xnr->oinfo, xnr->resv);
> > +		if (error)
> > +			return error;
> > +
> > +		if (sc->ip)
> > +			return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > +		return xrep_roll_ag_trans(sc);
> > +	}
> > +
> > +	/* We don't have EFIs here so skip the EFD. */
> > +
> > +	/*
> > +	 * Use the deferred freeing mechanism to schedule for deletion any
> > +	 * blocks we didn't use to rebuild the tree.  This enables us to log
> > +	 * them all in the same transaction as the root change.
> > +	 */
> > +	resv->fsbno += resv->used;
> > +	resv->len -= resv->used;
> > +	resv->used = 0;
> > +
> > +	if (resv->len == 0)
> > +		return 0;
> > +
> > +	trace_xrep_newbt_free_blocks(sc->mp,
> > +			XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> > +			XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> > +			resv->len, xnr->oinfo.oi_owner);
> > +
> > +	__xfs_bmap_add_free(sc->tp, resv->fsbno, resv->len, &xnr->oinfo, true);
> > +
> > +	return 0;
> > +}
> > +
> > +/* Free all the accounting info and disk space we reserved for a new btree. */
> > +void
> > +xrep_newbt_destroy(
> > +	struct xrep_newbt	*xnr,
> > +	int			error)
> > +{
> > +	struct repair_ctx	*sc = xnr->sc;
> > +	struct xrep_newbt_resv	*resv, *n;
> > +	int			err2;
> > +
> > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > +		err2 = xrep_newbt_destroy_reservation(xnr, resv, error != 0);
> > +		if (err2)
> > +			goto junkit;
> > +
> > +		list_del(&resv->list);
> > +		kmem_free(resv);
> > +	}
> > +
> > +junkit:
> > +	/*
> > +	 * If we still have reservations attached to @newbt, cleanup must have
> > +	 * failed and the filesystem is about to go down.  Clean up the incore
> > +	 * reservations.
> > +	 */
> > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > +		list_del(&resv->list);
> > +		kmem_free(resv);
> > +	}
> > +
> > +	if (sc->ip) {
> > +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> > +		xnr->ifake.if_fork = NULL;
> > +	}
> > +}
> > +
> > +/* Feed one of the reserved btree blocks to the bulk loader. */
> > +int
> > +xrep_newbt_claim_block(
> > +	struct xfs_btree_cur	*cur,
> > +	struct xrep_newbt	*xnr,
> > +	union xfs_btree_ptr	*ptr)
> > +{
> > +	struct xrep_newbt_resv	*resv;
> > +	xfs_fsblock_t		fsb;
> > +
> > +	/*
> > +	 * The first item in the list should always have a free block unless
> > +	 * we're completely out.
> > +	 */
> > +	resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
> > +	if (resv->used == resv->len)
> > +		return ENOSPC;
> > +
> > +	/*
> > +	 * Peel off a block from the start of the reservation.  We allocate
> > +	 * blocks in order to place blocks on disk in increasing record or key
> > +	 * order.  The block reservations tend to end up on the list in
> > +	 * decreasing order, which hopefully results in leaf blocks ending up
> > +	 * together.
> > +	 */
> > +	fsb = resv->fsbno + resv->used;
> > +	resv->used++;
> > +
> > +	/* If we used all the blocks in this reservation, move it to the end. */
> > +	if (resv->used == resv->len)
> > +		list_move_tail(&resv->list, &xnr->resv_list);
> > +
> > +	trace_xrep_newbt_claim_block(cur->bc_mp,
> > +			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
> > +			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
> > +			1, xnr->oinfo.oi_owner);
> > +
> > +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
> > +		ptr->l = cpu_to_be64(fsb);
> > +	else
> > +		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
> > +	return 0;
> > +}
> > diff --git a/repair/bload.h b/repair/bload.h
> > new file mode 100644
> > index 00000000..020c4834
> > --- /dev/null
> > +++ b/repair/bload.h
> > @@ -0,0 +1,77 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > + */
> > +#ifndef __XFS_REPAIR_BLOAD_H__
> > +#define __XFS_REPAIR_BLOAD_H__
> > +
> > +extern int bload_leaf_slack;
> > +extern int bload_node_slack;
> > +
> > +struct repair_ctx {
> > +	struct xfs_mount	*mp;
> > +	struct xfs_inode	*ip;
> > +	struct xfs_trans	*tp;
> > +
> > +	struct xfs_buf		*agi_bp;
> > +	struct xfs_buf		*agf_bp;
> > +	struct xfs_buf		*agfl_bp;
> > +};
> > +
> > +struct xrep_newbt_resv {
> > +	/* Link to list of extents that we've reserved. */
> > +	struct list_head	list;
> > +
> > +	/* FSB of the block we reserved. */
> > +	xfs_fsblock_t		fsbno;
> > +
> > +	/* Length of the reservation. */
> > +	xfs_extlen_t		len;
> > +
> > +	/* How much of this reservation we've used. */
> > +	xfs_extlen_t		used;
> > +};
> > +
> > +struct xrep_newbt {
> > +	struct repair_ctx	*sc;
> > +
> > +	/* List of extents that we've reserved. */
> > +	struct list_head	resv_list;
> > +
> > +	/* Fake root for new btree. */
> > +	union {
> > +		struct xbtree_afakeroot	afake;
> > +		struct xbtree_ifakeroot	ifake;
> > +	};
> > +
> > +	/* rmap owner of these blocks */
> > +	struct xfs_owner_info	oinfo;
> > +
> > +	/* The last reservation we allocated from. */
> > +	struct xrep_newbt_resv	*last_resv;
> > +
> > +	/* Allocation hint */
> > +	xfs_fsblock_t		alloc_hint;
> > +
> > +	/* per-ag reservation type */
> > +	enum xfs_ag_resv_type	resv;
> > +};
> > +
> > +#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
> > +	list_for_each_entry_safe((resv), (n), &(xnr)->resv_list, list)
> > +
> > +void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct repair_ctx *sc);
> > +void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > +		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
> > +		enum xfs_ag_resv_type resv);
> > +void xrep_newbt_init_inode(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > +		int whichfork, const struct xfs_owner_info *oinfo);
> > +int xrep_newbt_add_blocks(struct xrep_newbt *xnr, xfs_fsblock_t fsbno,
> > +		xfs_extlen_t len);
> > +int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
> > +void xrep_newbt_destroy(struct xrep_newbt *xnr, int error);
> > +int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
> > +		union xfs_btree_ptr *ptr);
> > +
> > +#endif /* __XFS_REPAIR_BLOAD_H__ */
> > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > index 9d72fa8e..8fbd3649 100644
> > --- a/repair/xfs_repair.c
> > +++ b/repair/xfs_repair.c
> > @@ -24,6 +24,7 @@
> >  #include "rmap.h"
> >  #include "libfrog/fsgeom.h"
> >  #include "libfrog/platform.h"
> > +#include "bload.h"
> >  
> >  /*
> >   * option tables for getsubopt calls
> > @@ -39,6 +40,8 @@ enum o_opt_nums {
> >  	AG_STRIDE,
> >  	FORCE_GEO,
> >  	PHASE2_THREADS,
> > +	BLOAD_LEAF_SLACK,
> > +	BLOAD_NODE_SLACK,
> >  	O_MAX_OPTS,
> >  };
> >  
> > @@ -49,6 +52,8 @@ static char *o_opts[] = {
> >  	[AG_STRIDE]		= "ag_stride",
> >  	[FORCE_GEO]		= "force_geometry",
> >  	[PHASE2_THREADS]	= "phase2_threads",
> > +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> > +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
> >  	[O_MAX_OPTS]		= NULL,
> >  };
> >  
> > @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
> >  		_("-o phase2_threads requires a parameter\n"));
> >  					phase2_threads = (int)strtol(val, NULL, 0);
> >  					break;
> > +				case BLOAD_LEAF_SLACK:
> > +					if (!val)
> > +						do_abort(
> > +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> > +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> > +					break;
> > +				case BLOAD_NODE_SLACK:
> > +					if (!val)
> > +						do_abort(
> > +		_("-o debug_bload_node_slack requires a parameter\n"));
> > +					bload_node_slack = (int)strtol(val, NULL, 0);
> > +					break;
> >  				default:
> >  					unknown('o', val);
> >  					break;
> > 
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-20  1:50 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
@ 2020-05-27 12:15   ` Brian Foster
  2020-05-27 22:34     ` Darrick J. Wong
  0 siblings, 1 reply; 25+ messages in thread
From: Brian Foster @ 2020-05-27 12:15 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Tue, May 19, 2020 at 06:50:49PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Port the new btree staging context and related block reservation helper
> code from the kernel to repair.  We'll use this in subsequent patches to
> implement btree bulk loading.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  include/libxfs.h         |    1 
>  libxfs/libxfs_api_defs.h |    2 
>  repair/Makefile          |    4 -
>  repair/bload.c           |  303 ++++++++++++++++++++++++++++++++++++++++++++++
>  repair/bload.h           |   77 ++++++++++++
>  repair/xfs_repair.c      |   17 +++
>  6 files changed, 402 insertions(+), 2 deletions(-)
>  create mode 100644 repair/bload.c
>  create mode 100644 repair/bload.h
> 
> 
...
> diff --git a/repair/bload.c b/repair/bload.c
> new file mode 100644
> index 00000000..9bc17468
> --- /dev/null
> +++ b/repair/bload.c
> @@ -0,0 +1,303 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + */
> +#include <libxfs.h>
> +#include "bload.h"
> +
> +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> +#define trace_xrep_newbt_free_blocks(...)	((void) 0)
> +
> +int bload_leaf_slack = -1;
> +int bload_node_slack = -1;
> +
> +/* Ported routines from fs/xfs/scrub/repair.c */
> +

Looks mostly straightforward, but I'll have to come back to this as I
get to the code that uses it later in the series. In the meantime, I see
some of these helpers in scrub/repair.c while not others. Are there
references to other routines that are intended to be copies from kernel
code?

Brian

> +/*
> + * Roll a transaction, keeping the AG headers locked and reinitializing
> + * the btree cursors.
> + */
> +int
> +xrep_roll_ag_trans(
> +	struct repair_ctx	*sc)
> +{
> +	int			error;
> +
> +	/* Keep the AG header buffers locked so we can keep going. */
> +	if (sc->agi_bp)
> +		libxfs_trans_bhold(sc->tp, sc->agi_bp);
> +	if (sc->agf_bp)
> +		libxfs_trans_bhold(sc->tp, sc->agf_bp);
> +	if (sc->agfl_bp)
> +		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
> +
> +	/*
> +	 * Roll the transaction.  We still own the buffer and the buffer lock
> +	 * regardless of whether or not the roll succeeds.  If the roll fails,
> +	 * the buffers will be released during teardown on our way out of the
> +	 * kernel.  If it succeeds, we join them to the new transaction and
> +	 * move on.
> +	 */
> +	error = -libxfs_trans_roll(&sc->tp);
> +	if (error)
> +		return error;
> +
> +	/* Join AG headers to the new transaction. */
> +	if (sc->agi_bp)
> +		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
> +	if (sc->agf_bp)
> +		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
> +	if (sc->agfl_bp)
> +		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
> +
> +	return 0;
> +}
> +
> +/* Initialize accounting resources for staging a new AG btree. */
> +void
> +xrep_newbt_init_ag(
> +	struct xrep_newbt		*xnr,
> +	struct repair_ctx		*sc,
> +	const struct xfs_owner_info	*oinfo,
> +	xfs_fsblock_t			alloc_hint,
> +	enum xfs_ag_resv_type		resv)
> +{
> +	memset(xnr, 0, sizeof(struct xrep_newbt));
> +	xnr->sc = sc;
> +	xnr->oinfo = *oinfo; /* structure copy */
> +	xnr->alloc_hint = alloc_hint;
> +	xnr->resv = resv;
> +	INIT_LIST_HEAD(&xnr->resv_list);
> +}
> +
> +/* Initialize accounting resources for staging a new inode fork btree. */
> +void
> +xrep_newbt_init_inode(
> +	struct xrep_newbt		*xnr,
> +	struct repair_ctx		*sc,
> +	int				whichfork,
> +	const struct xfs_owner_info	*oinfo)
> +{
> +	xrep_newbt_init_ag(xnr, sc, oinfo,
> +			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
> +			XFS_AG_RESV_NONE);
> +	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
> +	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
> +}
> +
> +/*
> + * Initialize accounting resources for staging a new btree.  Callers are
> + * expected to add their own reservations (and clean them up) manually.
> + */
> +void
> +xrep_newbt_init_bare(
> +	struct xrep_newbt		*xnr,
> +	struct repair_ctx		*sc)
> +{
> +	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
> +			XFS_AG_RESV_NONE);
> +}
> +
> +/* Designate specific blocks to be used to build our new btree. */
> +int
> +xrep_newbt_add_blocks(
> +	struct xrep_newbt	*xnr,
> +	xfs_fsblock_t		fsbno,
> +	xfs_extlen_t		len)
> +{
> +	struct xrep_newbt_resv	*resv;
> +
> +	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
> +	if (!resv)
> +		return ENOMEM;
> +
> +	INIT_LIST_HEAD(&resv->list);
> +	resv->fsbno = fsbno;
> +	resv->len = len;
> +	resv->used = 0;
> +	list_add_tail(&resv->list, &xnr->resv_list);
> +	return 0;
> +}
> +
> +/* Reserve disk space for our new btree. */
> +int
> +xrep_newbt_alloc_blocks(
> +	struct xrep_newbt	*xnr,
> +	uint64_t		nr_blocks)
> +{
> +	struct repair_ctx	*sc = xnr->sc;
> +	xfs_alloctype_t		type;
> +	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
> +	int			error = 0;
> +
> +	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
> +
> +	while (nr_blocks > 0 && !error) {
> +		struct xfs_alloc_arg	args = {
> +			.tp		= sc->tp,
> +			.mp		= sc->mp,
> +			.type		= type,
> +			.fsbno		= alloc_hint,
> +			.oinfo		= xnr->oinfo,
> +			.minlen		= 1,
> +			.maxlen		= nr_blocks,
> +			.prod		= 1,
> +			.resv		= xnr->resv,
> +		};
> +
> +		error = -libxfs_alloc_vextent(&args);
> +		if (error)
> +			return error;
> +		if (args.fsbno == NULLFSBLOCK)
> +			return ENOSPC;
> +
> +		/* We don't have real EFIs here so skip that. */
> +
> +		error = xrep_newbt_add_blocks(xnr, args.fsbno, args.len);
> +		if (error)
> +			break;
> +
> +		nr_blocks -= args.len;
> +		alloc_hint = args.fsbno + args.len - 1;
> +
> +		if (sc->ip)
> +			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> +		else
> +			error = xrep_roll_ag_trans(sc);
> +	}
> +
> +	return error;
> +}
> +
> +/*
> + * Release blocks that were reserved for a btree repair.  If the repair
> + * succeeded then we log deferred frees for unused blocks.  Otherwise, we try
> + * to free the extents immediately to roll the filesystem back to where it was
> + * before we started.
> + */
> +static inline int
> +xrep_newbt_destroy_reservation(
> +	struct xrep_newbt	*xnr,
> +	struct xrep_newbt_resv	*resv,
> +	bool			cancel_repair)
> +{
> +	struct repair_ctx	*sc = xnr->sc;
> +
> +	if (cancel_repair) {
> +		int		error;
> +
> +		/* Free the extent then roll the transaction. */
> +		error = -libxfs_free_extent(sc->tp, resv->fsbno, resv->len,
> +				&xnr->oinfo, xnr->resv);
> +		if (error)
> +			return error;
> +
> +		if (sc->ip)
> +			return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> +		return xrep_roll_ag_trans(sc);
> +	}
> +
> +	/* We don't have EFIs here so skip the EFD. */
> +
> +	/*
> +	 * Use the deferred freeing mechanism to schedule for deletion any
> +	 * blocks we didn't use to rebuild the tree.  This enables us to log
> +	 * them all in the same transaction as the root change.
> +	 */
> +	resv->fsbno += resv->used;
> +	resv->len -= resv->used;
> +	resv->used = 0;
> +
> +	if (resv->len == 0)
> +		return 0;
> +
> +	trace_xrep_newbt_free_blocks(sc->mp,
> +			XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> +			XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> +			resv->len, xnr->oinfo.oi_owner);
> +
> +	__xfs_bmap_add_free(sc->tp, resv->fsbno, resv->len, &xnr->oinfo, true);
> +
> +	return 0;
> +}
> +
> +/* Free all the accounting info and disk space we reserved for a new btree. */
> +void
> +xrep_newbt_destroy(
> +	struct xrep_newbt	*xnr,
> +	int			error)
> +{
> +	struct repair_ctx	*sc = xnr->sc;
> +	struct xrep_newbt_resv	*resv, *n;
> +	int			err2;
> +
> +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> +		err2 = xrep_newbt_destroy_reservation(xnr, resv, error != 0);
> +		if (err2)
> +			goto junkit;
> +
> +		list_del(&resv->list);
> +		kmem_free(resv);
> +	}
> +
> +junkit:
> +	/*
> +	 * If we still have reservations attached to @newbt, cleanup must have
> +	 * failed and the filesystem is about to go down.  Clean up the incore
> +	 * reservations.
> +	 */
> +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> +		list_del(&resv->list);
> +		kmem_free(resv);
> +	}
> +
> +	if (sc->ip) {
> +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> +		xnr->ifake.if_fork = NULL;
> +	}
> +}
> +
> +/* Feed one of the reserved btree blocks to the bulk loader. */
> +int
> +xrep_newbt_claim_block(
> +	struct xfs_btree_cur	*cur,
> +	struct xrep_newbt	*xnr,
> +	union xfs_btree_ptr	*ptr)
> +{
> +	struct xrep_newbt_resv	*resv;
> +	xfs_fsblock_t		fsb;
> +
> +	/*
> +	 * The first item in the list should always have a free block unless
> +	 * we're completely out.
> +	 */
> +	resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
> +	if (resv->used == resv->len)
> +		return ENOSPC;
> +
> +	/*
> +	 * Peel off a block from the start of the reservation.  We allocate
> +	 * blocks in order to place blocks on disk in increasing record or key
> +	 * order.  The block reservations tend to end up on the list in
> +	 * decreasing order, which hopefully results in leaf blocks ending up
> +	 * together.
> +	 */
> +	fsb = resv->fsbno + resv->used;
> +	resv->used++;
> +
> +	/* If we used all the blocks in this reservation, move it to the end. */
> +	if (resv->used == resv->len)
> +		list_move_tail(&resv->list, &xnr->resv_list);
> +
> +	trace_xrep_newbt_claim_block(cur->bc_mp,
> +			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
> +			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
> +			1, xnr->oinfo.oi_owner);
> +
> +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
> +		ptr->l = cpu_to_be64(fsb);
> +	else
> +		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
> +	return 0;
> +}
> diff --git a/repair/bload.h b/repair/bload.h
> new file mode 100644
> index 00000000..020c4834
> --- /dev/null
> +++ b/repair/bload.h
> @@ -0,0 +1,77 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + */
> +#ifndef __XFS_REPAIR_BLOAD_H__
> +#define __XFS_REPAIR_BLOAD_H__
> +
> +extern int bload_leaf_slack;
> +extern int bload_node_slack;
> +
> +struct repair_ctx {
> +	struct xfs_mount	*mp;
> +	struct xfs_inode	*ip;
> +	struct xfs_trans	*tp;
> +
> +	struct xfs_buf		*agi_bp;
> +	struct xfs_buf		*agf_bp;
> +	struct xfs_buf		*agfl_bp;
> +};
> +
> +struct xrep_newbt_resv {
> +	/* Link to list of extents that we've reserved. */
> +	struct list_head	list;
> +
> +	/* FSB of the block we reserved. */
> +	xfs_fsblock_t		fsbno;
> +
> +	/* Length of the reservation. */
> +	xfs_extlen_t		len;
> +
> +	/* How much of this reservation we've used. */
> +	xfs_extlen_t		used;
> +};
> +
> +struct xrep_newbt {
> +	struct repair_ctx	*sc;
> +
> +	/* List of extents that we've reserved. */
> +	struct list_head	resv_list;
> +
> +	/* Fake root for new btree. */
> +	union {
> +		struct xbtree_afakeroot	afake;
> +		struct xbtree_ifakeroot	ifake;
> +	};
> +
> +	/* rmap owner of these blocks */
> +	struct xfs_owner_info	oinfo;
> +
> +	/* The last reservation we allocated from. */
> +	struct xrep_newbt_resv	*last_resv;
> +
> +	/* Allocation hint */
> +	xfs_fsblock_t		alloc_hint;
> +
> +	/* per-ag reservation type */
> +	enum xfs_ag_resv_type	resv;
> +};
> +
> +#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
> +	list_for_each_entry_safe((resv), (n), &(xnr)->resv_list, list)
> +
> +void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct repair_ctx *sc);
> +void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct repair_ctx *sc,
> +		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
> +		enum xfs_ag_resv_type resv);
> +void xrep_newbt_init_inode(struct xrep_newbt *xnr, struct repair_ctx *sc,
> +		int whichfork, const struct xfs_owner_info *oinfo);
> +int xrep_newbt_add_blocks(struct xrep_newbt *xnr, xfs_fsblock_t fsbno,
> +		xfs_extlen_t len);
> +int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
> +void xrep_newbt_destroy(struct xrep_newbt *xnr, int error);
> +int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
> +		union xfs_btree_ptr *ptr);
> +
> +#endif /* __XFS_REPAIR_BLOAD_H__ */
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 9d72fa8e..8fbd3649 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -24,6 +24,7 @@
>  #include "rmap.h"
>  #include "libfrog/fsgeom.h"
>  #include "libfrog/platform.h"
> +#include "bload.h"
>  
>  /*
>   * option tables for getsubopt calls
> @@ -39,6 +40,8 @@ enum o_opt_nums {
>  	AG_STRIDE,
>  	FORCE_GEO,
>  	PHASE2_THREADS,
> +	BLOAD_LEAF_SLACK,
> +	BLOAD_NODE_SLACK,
>  	O_MAX_OPTS,
>  };
>  
> @@ -49,6 +52,8 @@ static char *o_opts[] = {
>  	[AG_STRIDE]		= "ag_stride",
>  	[FORCE_GEO]		= "force_geometry",
>  	[PHASE2_THREADS]	= "phase2_threads",
> +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
>  	[O_MAX_OPTS]		= NULL,
>  };
>  
> @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
>  		_("-o phase2_threads requires a parameter\n"));
>  					phase2_threads = (int)strtol(val, NULL, 0);
>  					break;
> +				case BLOAD_LEAF_SLACK:
> +					if (!val)
> +						do_abort(
> +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> +					break;
> +				case BLOAD_NODE_SLACK:
> +					if (!val)
> +						do_abort(
> +		_("-o debug_bload_node_slack requires a parameter\n"));
> +					bload_node_slack = (int)strtol(val, NULL, 0);
> +					break;
>  				default:
>  					unknown('o', val);
>  					break;
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-20  1:50 [PATCH v5 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
@ 2020-05-20  1:50 ` Darrick J. Wong
  2020-05-27 12:15   ` Brian Foster
  0 siblings, 1 reply; 25+ messages in thread
From: Darrick J. Wong @ 2020-05-20  1:50 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <darrick.wong@oracle.com>

Port the new btree staging context and related block reservation helper
code from the kernel to repair.  We'll use this in subsequent patches to
implement btree bulk loading.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/libxfs.h         |    1 
 libxfs/libxfs_api_defs.h |    2 
 repair/Makefile          |    4 -
 repair/bload.c           |  303 ++++++++++++++++++++++++++++++++++++++++++++++
 repair/bload.h           |   77 ++++++++++++
 repair/xfs_repair.c      |   17 +++
 6 files changed, 402 insertions(+), 2 deletions(-)
 create mode 100644 repair/bload.c
 create mode 100644 repair/bload.h


diff --git a/include/libxfs.h b/include/libxfs.h
index 12447835..b9370139 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -76,6 +76,7 @@ struct iomap;
 #include "xfs_rmap.h"
 #include "xfs_refcount_btree.h"
 #include "xfs_refcount.h"
+#include "xfs_btree_staging.h"
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index be06c763..61047f8f 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -27,12 +27,14 @@
 #define xfs_alloc_fix_freelist		libxfs_alloc_fix_freelist
 #define xfs_alloc_min_freelist		libxfs_alloc_min_freelist
 #define xfs_alloc_read_agf		libxfs_alloc_read_agf
+#define xfs_alloc_vextent		libxfs_alloc_vextent
 
 #define xfs_attr_get			libxfs_attr_get
 #define xfs_attr_leaf_newentsize	libxfs_attr_leaf_newentsize
 #define xfs_attr_namecheck		libxfs_attr_namecheck
 #define xfs_attr_set			libxfs_attr_set
 
+#define __xfs_bmap_add_free		__libxfs_bmap_add_free
 #define xfs_bmapi_read			libxfs_bmapi_read
 #define xfs_bmapi_write			libxfs_bmapi_write
 #define xfs_bmap_last_offset		libxfs_bmap_last_offset
diff --git a/repair/Makefile b/repair/Makefile
index 0964499a..8cc1ee68 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -9,11 +9,11 @@ LSRCFILES = README
 
 LTCOMMAND = xfs_repair
 
-HFILES = agheader.h attr_repair.h avl.h bmap.h btree.h \
+HFILES = agheader.h attr_repair.h avl.h bload.h bmap.h btree.h \
 	da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
 	rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
 
-CFILES = agheader.c attr_repair.c avl.c bmap.c btree.c \
+CFILES = agheader.c attr_repair.c avl.c bload.c bmap.c btree.c \
 	da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
 	incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
 	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
diff --git a/repair/bload.c b/repair/bload.c
new file mode 100644
index 00000000..9bc17468
--- /dev/null
+++ b/repair/bload.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <libxfs.h>
+#include "bload.h"
+
+#define trace_xrep_newbt_claim_block(...)	((void) 0)
+#define trace_xrep_newbt_free_blocks(...)	((void) 0)
+
+int bload_leaf_slack = -1;
+int bload_node_slack = -1;
+
+/* Ported routines from fs/xfs/scrub/repair.c */
+
+/*
+ * Roll a transaction, keeping the AG headers locked and reinitializing
+ * the btree cursors.
+ */
+int
+xrep_roll_ag_trans(
+	struct repair_ctx	*sc)
+{
+	int			error;
+
+	/* Keep the AG header buffers locked so we can keep going. */
+	if (sc->agi_bp)
+		libxfs_trans_bhold(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bhold(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
+
+	/*
+	 * Roll the transaction.  We still own the buffer and the buffer lock
+	 * regardless of whether or not the roll succeeds.  If the roll fails,
+	 * the buffers will be released during teardown on our way out of the
+	 * kernel.  If it succeeds, we join them to the new transaction and
+	 * move on.
+	 */
+	error = -libxfs_trans_roll(&sc->tp);
+	if (error)
+		return error;
+
+	/* Join AG headers to the new transaction. */
+	if (sc->agi_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
+
+	return 0;
+}
+
+/* Initialize accounting resources for staging a new AG btree. */
+void
+xrep_newbt_init_ag(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_fsblock_t			alloc_hint,
+	enum xfs_ag_resv_type		resv)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = alloc_hint;
+	xnr->resv = resv;
+	INIT_LIST_HEAD(&xnr->resv_list);
+}
+
+/* Initialize accounting resources for staging a new inode fork btree. */
+void
+xrep_newbt_init_inode(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	int				whichfork,
+	const struct xfs_owner_info	*oinfo)
+{
+	xrep_newbt_init_ag(xnr, sc, oinfo,
+			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
+			XFS_AG_RESV_NONE);
+	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
+	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
+}
+
+/*
+ * Initialize accounting resources for staging a new btree.  Callers are
+ * expected to add their own reservations (and clean them up) manually.
+ */
+void
+xrep_newbt_init_bare(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc)
+{
+	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
+			XFS_AG_RESV_NONE);
+}
+
+/* Designate specific blocks to be used to build our new btree. */
+int
+xrep_newbt_add_blocks(
+	struct xrep_newbt	*xnr,
+	xfs_fsblock_t		fsbno,
+	xfs_extlen_t		len)
+{
+	struct xrep_newbt_resv	*resv;
+
+	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
+	if (!resv)
+		return ENOMEM;
+
+	INIT_LIST_HEAD(&resv->list);
+	resv->fsbno = fsbno;
+	resv->len = len;
+	resv->used = 0;
+	list_add_tail(&resv->list, &xnr->resv_list);
+	return 0;
+}
+
+/* Reserve disk space for our new btree. */
+int
+xrep_newbt_alloc_blocks(
+	struct xrep_newbt	*xnr,
+	uint64_t		nr_blocks)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	xfs_alloctype_t		type;
+	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
+	int			error = 0;
+
+	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
+
+	while (nr_blocks > 0 && !error) {
+		struct xfs_alloc_arg	args = {
+			.tp		= sc->tp,
+			.mp		= sc->mp,
+			.type		= type,
+			.fsbno		= alloc_hint,
+			.oinfo		= xnr->oinfo,
+			.minlen		= 1,
+			.maxlen		= nr_blocks,
+			.prod		= 1,
+			.resv		= xnr->resv,
+		};
+
+		error = -libxfs_alloc_vextent(&args);
+		if (error)
+			return error;
+		if (args.fsbno == NULLFSBLOCK)
+			return ENOSPC;
+
+		/* We don't have real EFIs here so skip that. */
+
+		error = xrep_newbt_add_blocks(xnr, args.fsbno, args.len);
+		if (error)
+			break;
+
+		nr_blocks -= args.len;
+		alloc_hint = args.fsbno + args.len - 1;
+
+		if (sc->ip)
+			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
+		else
+			error = xrep_roll_ag_trans(sc);
+	}
+
+	return error;
+}
+
+/*
+ * Release blocks that were reserved for a btree repair.  If the repair
+ * succeeded then we log deferred frees for unused blocks.  Otherwise, we try
+ * to free the extents immediately to roll the filesystem back to where it was
+ * before we started.
+ */
+static inline int
+xrep_newbt_destroy_reservation(
+	struct xrep_newbt	*xnr,
+	struct xrep_newbt_resv	*resv,
+	bool			cancel_repair)
+{
+	struct repair_ctx	*sc = xnr->sc;
+
+	if (cancel_repair) {
+		int		error;
+
+		/* Free the extent then roll the transaction. */
+		error = -libxfs_free_extent(sc->tp, resv->fsbno, resv->len,
+				&xnr->oinfo, xnr->resv);
+		if (error)
+			return error;
+
+		if (sc->ip)
+			return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
+		return xrep_roll_ag_trans(sc);
+	}
+
+	/* We don't have EFIs here so skip the EFD. */
+
+	/*
+	 * Use the deferred freeing mechanism to schedule for deletion any
+	 * blocks we didn't use to rebuild the tree.  This enables us to log
+	 * them all in the same transaction as the root change.
+	 */
+	resv->fsbno += resv->used;
+	resv->len -= resv->used;
+	resv->used = 0;
+
+	if (resv->len == 0)
+		return 0;
+
+	trace_xrep_newbt_free_blocks(sc->mp,
+			XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
+			XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
+			resv->len, xnr->oinfo.oi_owner);
+
+	__xfs_bmap_add_free(sc->tp, resv->fsbno, resv->len, &xnr->oinfo, true);
+
+	return 0;
+}
+
+/* Free all the accounting info and disk space we reserved for a new btree. */
+void
+xrep_newbt_destroy(
+	struct xrep_newbt	*xnr,
+	int			error)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	struct xrep_newbt_resv	*resv, *n;
+	int			err2;
+
+	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
+		err2 = xrep_newbt_destroy_reservation(xnr, resv, error != 0);
+		if (err2)
+			goto junkit;
+
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+junkit:
+	/*
+	 * If we still have reservations attached to @newbt, cleanup must have
+	 * failed and the filesystem is about to go down.  Clean up the incore
+	 * reservations.
+	 */
+	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+	if (sc->ip) {
+		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
+		xnr->ifake.if_fork = NULL;
+	}
+}
+
+/* Feed one of the reserved btree blocks to the bulk loader. */
+int
+xrep_newbt_claim_block(
+	struct xfs_btree_cur	*cur,
+	struct xrep_newbt	*xnr,
+	union xfs_btree_ptr	*ptr)
+{
+	struct xrep_newbt_resv	*resv;
+	xfs_fsblock_t		fsb;
+
+	/*
+	 * The first item in the list should always have a free block unless
+	 * we're completely out.
+	 */
+	resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
+	if (resv->used == resv->len)
+		return ENOSPC;
+
+	/*
+	 * Peel off a block from the start of the reservation.  We allocate
+	 * blocks in order to place blocks on disk in increasing record or key
+	 * order.  The block reservations tend to end up on the list in
+	 * decreasing order, which hopefully results in leaf blocks ending up
+	 * together.
+	 */
+	fsb = resv->fsbno + resv->used;
+	resv->used++;
+
+	/* If we used all the blocks in this reservation, move it to the end. */
+	if (resv->used == resv->len)
+		list_move_tail(&resv->list, &xnr->resv_list);
+
+	trace_xrep_newbt_claim_block(cur->bc_mp,
+			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
+			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
+			1, xnr->oinfo.oi_owner);
+
+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+		ptr->l = cpu_to_be64(fsb);
+	else
+		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
+	return 0;
+}
diff --git a/repair/bload.h b/repair/bload.h
new file mode 100644
index 00000000..020c4834
--- /dev/null
+++ b/repair/bload.h
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_REPAIR_BLOAD_H__
+#define __XFS_REPAIR_BLOAD_H__
+
+extern int bload_leaf_slack;
+extern int bload_node_slack;
+
+struct repair_ctx {
+	struct xfs_mount	*mp;
+	struct xfs_inode	*ip;
+	struct xfs_trans	*tp;
+
+	struct xfs_buf		*agi_bp;
+	struct xfs_buf		*agf_bp;
+	struct xfs_buf		*agfl_bp;
+};
+
+struct xrep_newbt_resv {
+	/* Link to list of extents that we've reserved. */
+	struct list_head	list;
+
+	/* FSB of the block we reserved. */
+	xfs_fsblock_t		fsbno;
+
+	/* Length of the reservation. */
+	xfs_extlen_t		len;
+
+	/* How much of this reservation we've used. */
+	xfs_extlen_t		used;
+};
+
+struct xrep_newbt {
+	struct repair_ctx	*sc;
+
+	/* List of extents that we've reserved. */
+	struct list_head	resv_list;
+
+	/* Fake root for new btree. */
+	union {
+		struct xbtree_afakeroot	afake;
+		struct xbtree_ifakeroot	ifake;
+	};
+
+	/* rmap owner of these blocks */
+	struct xfs_owner_info	oinfo;
+
+	/* The last reservation we allocated from. */
+	struct xrep_newbt_resv	*last_resv;
+
+	/* Allocation hint */
+	xfs_fsblock_t		alloc_hint;
+
+	/* per-ag reservation type */
+	enum xfs_ag_resv_type	resv;
+};
+
+#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
+	list_for_each_entry_safe((resv), (n), &(xnr)->resv_list, list)
+
+void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct repair_ctx *sc);
+void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct repair_ctx *sc,
+		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
+		enum xfs_ag_resv_type resv);
+void xrep_newbt_init_inode(struct xrep_newbt *xnr, struct repair_ctx *sc,
+		int whichfork, const struct xfs_owner_info *oinfo);
+int xrep_newbt_add_blocks(struct xrep_newbt *xnr, xfs_fsblock_t fsbno,
+		xfs_extlen_t len);
+int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
+void xrep_newbt_destroy(struct xrep_newbt *xnr, int error);
+int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
+		union xfs_btree_ptr *ptr);
+
+#endif /* __XFS_REPAIR_BLOAD_H__ */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 9d72fa8e..8fbd3649 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -24,6 +24,7 @@
 #include "rmap.h"
 #include "libfrog/fsgeom.h"
 #include "libfrog/platform.h"
+#include "bload.h"
 
 /*
  * option tables for getsubopt calls
@@ -39,6 +40,8 @@ enum o_opt_nums {
 	AG_STRIDE,
 	FORCE_GEO,
 	PHASE2_THREADS,
+	BLOAD_LEAF_SLACK,
+	BLOAD_NODE_SLACK,
 	O_MAX_OPTS,
 };
 
@@ -49,6 +52,8 @@ static char *o_opts[] = {
 	[AG_STRIDE]		= "ag_stride",
 	[FORCE_GEO]		= "force_geometry",
 	[PHASE2_THREADS]	= "phase2_threads",
+	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
+	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
 	[O_MAX_OPTS]		= NULL,
 };
 
@@ -260,6 +265,18 @@ process_args(int argc, char **argv)
 		_("-o phase2_threads requires a parameter\n"));
 					phase2_threads = (int)strtol(val, NULL, 0);
 					break;
+				case BLOAD_LEAF_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_leaf_slack requires a parameter\n"));
+					bload_leaf_slack = (int)strtol(val, NULL, 0);
+					break;
+				case BLOAD_NODE_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_node_slack requires a parameter\n"));
+					bload_node_slack = (int)strtol(val, NULL, 0);
+					break;
 				default:
 					unknown('o', val);
 					break;


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-15 18:52         ` Darrick J. Wong
@ 2020-05-15 19:43           ` Brian Foster
  0 siblings, 0 replies; 25+ messages in thread
From: Brian Foster @ 2020-05-15 19:43 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Fri, May 15, 2020 at 11:52:39AM -0700, Darrick J. Wong wrote:
> On Fri, May 15, 2020 at 07:41:16AM -0400, Brian Foster wrote:
> > On Thu, May 14, 2020 at 12:20:37PM -0700, Darrick J. Wong wrote:
> > > On Thu, May 14, 2020 at 11:09:33AM -0400, Brian Foster wrote:
> > > > On Sat, May 09, 2020 at 09:31:47AM -0700, Darrick J. Wong wrote:
> > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > 
> > > > > Port the new btree staging context and related block reservation helper
> > > > > code from the kernel to repair.  We'll use this in subsequent patches to
> > > > > implement btree bulk loading.
> > > > > 
> > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > ---
> > > > >  include/libxfs.h         |    1 
> > > > >  libxfs/libxfs_api_defs.h |    2 
> > > > >  repair/Makefile          |    4 -
> > > > >  repair/bload.c           |  276 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > >  repair/bload.h           |   79 +++++++++++++
> > > > >  repair/xfs_repair.c      |   17 +++
> > > > >  6 files changed, 377 insertions(+), 2 deletions(-)
> > > > >  create mode 100644 repair/bload.c
> > > > >  create mode 100644 repair/bload.h
> > > > > 
> > > > > 
> > > > ...
> > > > > diff --git a/repair/bload.c b/repair/bload.c
> > > > > new file mode 100644
> > > > > index 00000000..ab05815c
> > > > > --- /dev/null
> > > > > +++ b/repair/bload.c
> > > > > @@ -0,0 +1,276 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > > +/*
> > > > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > + */
> > > > > +#include <libxfs.h>
> > > > > +#include "bload.h"
> > > > > +
> > > > > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > > > > +#define trace_xrep_newbt_reserve_space(...)	((void) 0)
> > > > > +#define trace_xrep_newbt_unreserve_space(...)	((void) 0)
> > > > > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > > > > +
> > > > > +int bload_leaf_slack = -1;
> > > > > +int bload_node_slack = -1;
> > > > > +
> > > > > +/* Ported routines from fs/xfs/scrub/repair.c */
> > > > > +
> > > > 
> > > > Any plans to generalize/lift more of this stuff into libxfs if it's
> > > > going to be shared with xfsprogs?
> > > 
> > > That depends on what the final online repair code looks like.
> > > I suspect it'll be different enough that it's not worth sharing, but I
> > > wouldn't be opposed to sharing identical functions.
> > > 
> > 
> > Ok, I was just going off the above note around porting existing code
> > from kernel scrub. I think it's reasonable to consider generalizations
> > later once both implementations are solidified.
> > 
> > > > ...
> > > > > +/* Free all the accounting infor and disk space we reserved for a new btree. */
> > > > > +void
> > > > > +xrep_newbt_destroy(
> > > > > +	struct xrep_newbt	*xnr,
> > > > > +	int			error)
> > > > > +{
> > > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > > +	struct xrep_newbt_resv	*resv, *n;
> > > > > +
> > > > > +	if (error)
> > > > > +		goto junkit;
> > > > 
> > > > Could use a comment on why we skip block freeing here..
> > > 
> > > I wonder what was the original reason for that?
> > > 
> > > IIRC if we actually error out of btree rebuilds then we've done
> > > something totally wrong while setting up the btree loader, or the
> > > storage is so broken that writes failed.  Repair is just going to call
> > > do_error() to terminate (and leave us with a broken filesystem) so we
> > > could just terminate right there at the top.
> > > 
> > 
> > Indeed.
> 
> Bah, I just realized that you and I have already reviewed a lot of this
> stuff for the kernel, and apparently I never backported that. :(
> 

Ok, I thought that stuff was actually merged so I'm kind of confused at
this point. :P

> In looking at what's in the kernel now, I realized that in general,
> the xfs_btree_bload_compute_geometry function will estimate the correct
> number of blocks to reserve for the new btree, so all this code exists
> to deal with either (a) overestimates when rebuilding the free space
> btrees; or (b) the kernel encountering a runtime error (e.g. ENOMEM) and
> needing to back out everything it's done.
> 
> For repair, (a) is still a possibility.  (b) is not, since repair will
> abort, but on the other hand it'll be easier to review a patch to unify
> the two implementations if the code stays identical.
> 
> Looking even further ahead, I plan to add two more users of the bulk
> loader: rebuilders for the bmap btrees, and (even later) the realtime
> rmapbt.  It would be helpful to keep as much of the code the same
> between repair and scrub.
> 
> So for now we don't really need the ability to free an over-reservation,
> but in the longer run it will make unification more obvious.
> 

It's also easier to review code that's already been reviewed from the
kernel and is being carted over for reuse, so I think it makes sense to
keep things in sync for that reason as well.

> /me vaguely wonders if we ought to be reviewing both of these patchsets
> in parallel....
> 

Re: above. I thought that stuff was merged and the approach was to move
the code over for reuse between scrub/xfs_repair. In any event, I think
what would facilitate subsequent reviews is some explicit separation
between patches for shared code and repair-specific code as well as some
references in the cover letter for the source of the former if those
bits haven't landed in the kernel yet...

Brian

> > > > I'm also wondering if we can check error in the primary loop and kill
> > > > the label and duplicate loop, but I guess that depends on whether the
> > > > fields are always valid.
> > > 
> > > I think they are.
> > > 
> > > > > +
> > > > > +	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
> > > > > +		/* We don't have EFIs here so skip the EFD. */
> > > > > +
> > > > > +		/* Free every block we didn't use. */
> > > > > +		resv->fsbno += resv->used;
> > > > > +		resv->len -= resv->used;
> > > > > +		resv->used = 0;
> > > > > +
> > > > > +		if (resv->len > 0) {
> > > > > +			trace_xrep_newbt_unreserve_space(sc->mp,
> > > > > +					XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> > > > > +					XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> > > > > +					resv->len, xnr->oinfo.oi_owner);
> > > > > +
> > > > > +			__libxfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
> > > > > +					&xnr->oinfo, true);
> > > 
> > > TBH for repair I don't even think we need this, since in theory we
> > > reserved *exactly* the correct number of blocks for the btree.  Hmm.
> > > 
> > 
> > Ok, well it would be good to clean up whether we remove it, clean it up
> > or perhaps document why we wouldn't look at the resv fields on error if
> > there turns out to be specific reason for that.
> 
> <nod>
> 
> > > > > +		}
> > > > > +
> > > > > +		list_del(&resv->list);
> > > > > +		kmem_free(resv);
> > > > > +	}
> > > > > +
> > > > > +junkit:
> > > > > +	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
> > > > > +		list_del(&resv->list);
> > > > > +		kmem_free(resv);
> > > > > +	}
> > > > > +
> > > > > +	if (sc->ip) {
> > > > > +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> > > > > +		xnr->ifake.if_fork = NULL;
> > > > > +	}
> > > > > +}
> > > > > +
> > > > ...
> > > > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > > > index 9d72fa8e..8fbd3649 100644
> > > > > --- a/repair/xfs_repair.c
> > > > > +++ b/repair/xfs_repair.c
> > > > ...
> > > > > @@ -49,6 +52,8 @@ static char *o_opts[] = {
> > > > >  	[AG_STRIDE]		= "ag_stride",
> > > > >  	[FORCE_GEO]		= "force_geometry",
> > > > >  	[PHASE2_THREADS]	= "phase2_threads",
> > > > > +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> > > > > +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
> > > > 
> > > > Why the "debug_" in the option names?
> > > 
> > > These are debugging knobs; there's no reason why any normal user would
> > > want to override the automatic slack sizing algorithms.  I also
> > > refrained from documenting them in the manpage. :P
> > > 
> > 
> > Oh, Ok. Perhaps that explains why they aren't in the usage() either. ;)
> 
> Yup.
> 
> --D
> 
> > Brian
> > 
> > > However, the knobs have been useful for stress-testing w/ fstests.
> > > 
> > > --D
> > > 
> > > > Brian
> > > > 
> > > > >  	[O_MAX_OPTS]		= NULL,
> > > > >  };
> > > > >  
> > > > > @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
> > > > >  		_("-o phase2_threads requires a parameter\n"));
> > > > >  					phase2_threads = (int)strtol(val, NULL, 0);
> > > > >  					break;
> > > > > +				case BLOAD_LEAF_SLACK:
> > > > > +					if (!val)
> > > > > +						do_abort(
> > > > > +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> > > > > +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> > > > > +					break;
> > > > > +				case BLOAD_NODE_SLACK:
> > > > > +					if (!val)
> > > > > +						do_abort(
> > > > > +		_("-o debug_bload_node_slack requires a parameter\n"));
> > > > > +					bload_node_slack = (int)strtol(val, NULL, 0);
> > > > > +					break;
> > > > >  				default:
> > > > >  					unknown('o', val);
> > > > >  					break;
> > > > > 
> > > > 
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-15 11:41       ` Brian Foster
@ 2020-05-15 18:52         ` Darrick J. Wong
  2020-05-15 19:43           ` Brian Foster
  0 siblings, 1 reply; 25+ messages in thread
From: Darrick J. Wong @ 2020-05-15 18:52 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Fri, May 15, 2020 at 07:41:16AM -0400, Brian Foster wrote:
> On Thu, May 14, 2020 at 12:20:37PM -0700, Darrick J. Wong wrote:
> > On Thu, May 14, 2020 at 11:09:33AM -0400, Brian Foster wrote:
> > > On Sat, May 09, 2020 at 09:31:47AM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > 
> > > > Port the new btree staging context and related block reservation helper
> > > > code from the kernel to repair.  We'll use this in subsequent patches to
> > > > implement btree bulk loading.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > >  include/libxfs.h         |    1 
> > > >  libxfs/libxfs_api_defs.h |    2 
> > > >  repair/Makefile          |    4 -
> > > >  repair/bload.c           |  276 ++++++++++++++++++++++++++++++++++++++++++++++
> > > >  repair/bload.h           |   79 +++++++++++++
> > > >  repair/xfs_repair.c      |   17 +++
> > > >  6 files changed, 377 insertions(+), 2 deletions(-)
> > > >  create mode 100644 repair/bload.c
> > > >  create mode 100644 repair/bload.h
> > > > 
> > > > 
> > > ...
> > > > diff --git a/repair/bload.c b/repair/bload.c
> > > > new file mode 100644
> > > > index 00000000..ab05815c
> > > > --- /dev/null
> > > > +++ b/repair/bload.c
> > > > @@ -0,0 +1,276 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > +/*
> > > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > > + */
> > > > +#include <libxfs.h>
> > > > +#include "bload.h"
> > > > +
> > > > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > > > +#define trace_xrep_newbt_reserve_space(...)	((void) 0)
> > > > +#define trace_xrep_newbt_unreserve_space(...)	((void) 0)
> > > > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > > > +
> > > > +int bload_leaf_slack = -1;
> > > > +int bload_node_slack = -1;
> > > > +
> > > > +/* Ported routines from fs/xfs/scrub/repair.c */
> > > > +
> > > 
> > > Any plans to generalize/lift more of this stuff into libxfs if it's
> > > going to be shared with xfsprogs?
> > 
> > That depends on what the final online repair code looks like.
> > I suspect it'll be different enough that it's not worth sharing, but I
> > wouldn't be opposed to sharing identical functions.
> > 
> 
> Ok, I was just going off the above note around porting existing code
> from kernel scrub. I think it's reasonable to consider generalizations
> later once both implementations are solidified.
> 
> > > ...
> > > > +/* Free all the accounting infor and disk space we reserved for a new btree. */
> > > > +void
> > > > +xrep_newbt_destroy(
> > > > +	struct xrep_newbt	*xnr,
> > > > +	int			error)
> > > > +{
> > > > +	struct repair_ctx	*sc = xnr->sc;
> > > > +	struct xrep_newbt_resv	*resv, *n;
> > > > +
> > > > +	if (error)
> > > > +		goto junkit;
> > > 
> > > Could use a comment on why we skip block freeing here..
> > 
> > I wonder what was the original reason for that?
> > 
> > IIRC if we actually error out of btree rebuilds then we've done
> > something totally wrong while setting up the btree loader, or the
> > storage is so broken that writes failed.  Repair is just going to call
> > do_error() to terminate (and leave us with a broken filesystem) so we
> > could just terminate right there at the top.
> > 
> 
> Indeed.

Bah, I just realized that you and I have already reviewed a lot of this
stuff for the kernel, and apparently I never backported that. :(

In looking at what's in the kernel now, I realized that in general,
the xfs_btree_bload_compute_geometry function will estimate the correct
number of blocks to reserve for the new btree, so all this code exists
to deal with either (a) overestimates when rebuilding the free space
btrees; or (b) the kernel encountering a runtime error (e.g. ENOMEM) and
needing to back out everything it's done.

For repair, (a) is still a possibility.  (b) is not, since repair will
abort, but on the other hand it'll be easier to review a patch to unify
the two implementations if the code stays identical.

Looking even further ahead, I plan to add two more users of the bulk
loader: rebuilders for the bmap btrees, and (even later) the realtime
rmapbt.  It would be helpful to keep as much of the code the same
between repair and scrub.

So for now we don't really need the ability to free an over-reservation,
but in the longer run it will make unification more obvious.

/me vaguely wonders if we ought to be reviewing both of these patchsets
in parallel....

> > > I'm also wondering if we can check error in the primary loop and kill
> > > the label and duplicate loop, but I guess that depends on whether the
> > > fields are always valid.
> > 
> > I think they are.
> > 
> > > > +
> > > > +	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
> > > > +		/* We don't have EFIs here so skip the EFD. */
> > > > +
> > > > +		/* Free every block we didn't use. */
> > > > +		resv->fsbno += resv->used;
> > > > +		resv->len -= resv->used;
> > > > +		resv->used = 0;
> > > > +
> > > > +		if (resv->len > 0) {
> > > > +			trace_xrep_newbt_unreserve_space(sc->mp,
> > > > +					XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> > > > +					XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> > > > +					resv->len, xnr->oinfo.oi_owner);
> > > > +
> > > > +			__libxfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
> > > > +					&xnr->oinfo, true);
> > 
> > TBH for repair I don't even think we need this, since in theory we
> > reserved *exactly* the correct number of blocks for the btree.  Hmm.
> > 
> 
> Ok, well it would be good to clean up whether we remove it, clean it up
> or perhaps document why we wouldn't look at the resv fields on error if
> there turns out to be specific reason for that.

<nod>

> > > > +		}
> > > > +
> > > > +		list_del(&resv->list);
> > > > +		kmem_free(resv);
> > > > +	}
> > > > +
> > > > +junkit:
> > > > +	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
> > > > +		list_del(&resv->list);
> > > > +		kmem_free(resv);
> > > > +	}
> > > > +
> > > > +	if (sc->ip) {
> > > > +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> > > > +		xnr->ifake.if_fork = NULL;
> > > > +	}
> > > > +}
> > > > +
> > > ...
> > > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > > index 9d72fa8e..8fbd3649 100644
> > > > --- a/repair/xfs_repair.c
> > > > +++ b/repair/xfs_repair.c
> > > ...
> > > > @@ -49,6 +52,8 @@ static char *o_opts[] = {
> > > >  	[AG_STRIDE]		= "ag_stride",
> > > >  	[FORCE_GEO]		= "force_geometry",
> > > >  	[PHASE2_THREADS]	= "phase2_threads",
> > > > +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> > > > +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
> > > 
> > > Why the "debug_" in the option names?
> > 
> > These are debugging knobs; there's no reason why any normal user would
> > want to override the automatic slack sizing algorithms.  I also
> > refrained from documenting them in the manpage. :P
> > 
> 
> Oh, Ok. Perhaps that explains why they aren't in the usage() either. ;)

Yup.

--D

> Brian
> 
> > However, the knobs have been useful for stress-testing w/ fstests.
> > 
> > --D
> > 
> > > Brian
> > > 
> > > >  	[O_MAX_OPTS]		= NULL,
> > > >  };
> > > >  
> > > > @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
> > > >  		_("-o phase2_threads requires a parameter\n"));
> > > >  					phase2_threads = (int)strtol(val, NULL, 0);
> > > >  					break;
> > > > +				case BLOAD_LEAF_SLACK:
> > > > +					if (!val)
> > > > +						do_abort(
> > > > +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> > > > +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> > > > +					break;
> > > > +				case BLOAD_NODE_SLACK:
> > > > +					if (!val)
> > > > +						do_abort(
> > > > +		_("-o debug_bload_node_slack requires a parameter\n"));
> > > > +					bload_node_slack = (int)strtol(val, NULL, 0);
> > > > +					break;
> > > >  				default:
> > > >  					unknown('o', val);
> > > >  					break;
> > > > 
> > > 
> > 
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-14 19:20     ` Darrick J. Wong
@ 2020-05-15 11:41       ` Brian Foster
  2020-05-15 18:52         ` Darrick J. Wong
  0 siblings, 1 reply; 25+ messages in thread
From: Brian Foster @ 2020-05-15 11:41 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Thu, May 14, 2020 at 12:20:37PM -0700, Darrick J. Wong wrote:
> On Thu, May 14, 2020 at 11:09:33AM -0400, Brian Foster wrote:
> > On Sat, May 09, 2020 at 09:31:47AM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Port the new btree staging context and related block reservation helper
> > > code from the kernel to repair.  We'll use this in subsequent patches to
> > > implement btree bulk loading.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  include/libxfs.h         |    1 
> > >  libxfs/libxfs_api_defs.h |    2 
> > >  repair/Makefile          |    4 -
> > >  repair/bload.c           |  276 ++++++++++++++++++++++++++++++++++++++++++++++
> > >  repair/bload.h           |   79 +++++++++++++
> > >  repair/xfs_repair.c      |   17 +++
> > >  6 files changed, 377 insertions(+), 2 deletions(-)
> > >  create mode 100644 repair/bload.c
> > >  create mode 100644 repair/bload.h
> > > 
> > > 
> > ...
> > > diff --git a/repair/bload.c b/repair/bload.c
> > > new file mode 100644
> > > index 00000000..ab05815c
> > > --- /dev/null
> > > +++ b/repair/bload.c
> > > @@ -0,0 +1,276 @@
> > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > +/*
> > > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > > + */
> > > +#include <libxfs.h>
> > > +#include "bload.h"
> > > +
> > > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > > +#define trace_xrep_newbt_reserve_space(...)	((void) 0)
> > > +#define trace_xrep_newbt_unreserve_space(...)	((void) 0)
> > > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > > +
> > > +int bload_leaf_slack = -1;
> > > +int bload_node_slack = -1;
> > > +
> > > +/* Ported routines from fs/xfs/scrub/repair.c */
> > > +
> > 
> > Any plans to generalize/lift more of this stuff into libxfs if it's
> > going to be shared with xfsprogs?
> 
> That depends on what the final online repair code looks like.
> I suspect it'll be different enough that it's not worth sharing, but I
> wouldn't be opposed to sharing identical functions.
> 

Ok, I was just going off the above note around porting existing code
from kernel scrub. I think it's reasonable to consider generalizations
later once both implementations are solidified.

> > ...
> > > +/* Free all the accounting infor and disk space we reserved for a new btree. */
> > > +void
> > > +xrep_newbt_destroy(
> > > +	struct xrep_newbt	*xnr,
> > > +	int			error)
> > > +{
> > > +	struct repair_ctx	*sc = xnr->sc;
> > > +	struct xrep_newbt_resv	*resv, *n;
> > > +
> > > +	if (error)
> > > +		goto junkit;
> > 
> > Could use a comment on why we skip block freeing here..
> 
> I wonder what was the original reason for that?
> 
> IIRC if we actually error out of btree rebuilds then we've done
> something totally wrong while setting up the btree loader, or the
> storage is so broken that writes failed.  Repair is just going to call
> do_error() to terminate (and leave us with a broken filesystem) so we
> could just terminate right there at the top.
> 

Indeed.

> > I'm also wondering if we can check error in the primary loop and kill
> > the label and duplicate loop, but I guess that depends on whether the
> > fields are always valid.
> 
> I think they are.
> 
> > > +
> > > +	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
> > > +		/* We don't have EFIs here so skip the EFD. */
> > > +
> > > +		/* Free every block we didn't use. */
> > > +		resv->fsbno += resv->used;
> > > +		resv->len -= resv->used;
> > > +		resv->used = 0;
> > > +
> > > +		if (resv->len > 0) {
> > > +			trace_xrep_newbt_unreserve_space(sc->mp,
> > > +					XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> > > +					XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> > > +					resv->len, xnr->oinfo.oi_owner);
> > > +
> > > +			__libxfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
> > > +					&xnr->oinfo, true);
> 
> TBH for repair I don't even think we need this, since in theory we
> reserved *exactly* the correct number of blocks for the btree.  Hmm.
> 

Ok, well it would be good to clean up whether we remove it, clean it up
or perhaps document why we wouldn't look at the resv fields on error if
there turns out to be specific reason for that.

> > > +		}
> > > +
> > > +		list_del(&resv->list);
> > > +		kmem_free(resv);
> > > +	}
> > > +
> > > +junkit:
> > > +	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
> > > +		list_del(&resv->list);
> > > +		kmem_free(resv);
> > > +	}
> > > +
> > > +	if (sc->ip) {
> > > +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> > > +		xnr->ifake.if_fork = NULL;
> > > +	}
> > > +}
> > > +
> > ...
> > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > index 9d72fa8e..8fbd3649 100644
> > > --- a/repair/xfs_repair.c
> > > +++ b/repair/xfs_repair.c
> > ...
> > > @@ -49,6 +52,8 @@ static char *o_opts[] = {
> > >  	[AG_STRIDE]		= "ag_stride",
> > >  	[FORCE_GEO]		= "force_geometry",
> > >  	[PHASE2_THREADS]	= "phase2_threads",
> > > +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> > > +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
> > 
> > Why the "debug_" in the option names?
> 
> These are debugging knobs; there's no reason why any normal user would
> want to override the automatic slack sizing algorithms.  I also
> refrained from documenting them in the manpage. :P
> 

Oh, Ok. Perhaps that explains why they aren't in the usage() either. ;)

Brian

> However, the knobs have been useful for stress-testing w/ fstests.
> 
> --D
> 
> > Brian
> > 
> > >  	[O_MAX_OPTS]		= NULL,
> > >  };
> > >  
> > > @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
> > >  		_("-o phase2_threads requires a parameter\n"));
> > >  					phase2_threads = (int)strtol(val, NULL, 0);
> > >  					break;
> > > +				case BLOAD_LEAF_SLACK:
> > > +					if (!val)
> > > +						do_abort(
> > > +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> > > +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> > > +					break;
> > > +				case BLOAD_NODE_SLACK:
> > > +					if (!val)
> > > +						do_abort(
> > > +		_("-o debug_bload_node_slack requires a parameter\n"));
> > > +					bload_node_slack = (int)strtol(val, NULL, 0);
> > > +					break;
> > >  				default:
> > >  					unknown('o', val);
> > >  					break;
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-14 15:09   ` Brian Foster
@ 2020-05-14 19:20     ` Darrick J. Wong
  2020-05-15 11:41       ` Brian Foster
  0 siblings, 1 reply; 25+ messages in thread
From: Darrick J. Wong @ 2020-05-14 19:20 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Thu, May 14, 2020 at 11:09:33AM -0400, Brian Foster wrote:
> On Sat, May 09, 2020 at 09:31:47AM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Port the new btree staging context and related block reservation helper
> > code from the kernel to repair.  We'll use this in subsequent patches to
> > implement btree bulk loading.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  include/libxfs.h         |    1 
> >  libxfs/libxfs_api_defs.h |    2 
> >  repair/Makefile          |    4 -
> >  repair/bload.c           |  276 ++++++++++++++++++++++++++++++++++++++++++++++
> >  repair/bload.h           |   79 +++++++++++++
> >  repair/xfs_repair.c      |   17 +++
> >  6 files changed, 377 insertions(+), 2 deletions(-)
> >  create mode 100644 repair/bload.c
> >  create mode 100644 repair/bload.h
> > 
> > 
> ...
> > diff --git a/repair/bload.c b/repair/bload.c
> > new file mode 100644
> > index 00000000..ab05815c
> > --- /dev/null
> > +++ b/repair/bload.c
> > @@ -0,0 +1,276 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > + */
> > +#include <libxfs.h>
> > +#include "bload.h"
> > +
> > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > +#define trace_xrep_newbt_reserve_space(...)	((void) 0)
> > +#define trace_xrep_newbt_unreserve_space(...)	((void) 0)
> > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > +
> > +int bload_leaf_slack = -1;
> > +int bload_node_slack = -1;
> > +
> > +/* Ported routines from fs/xfs/scrub/repair.c */
> > +
> 
> Any plans to generalize/lift more of this stuff into libxfs if it's
> going to be shared with xfsprogs?

That depends on what the final online repair code looks like.
I suspect it'll be different enough that it's not worth sharing, but I
wouldn't be opposed to sharing identical functions.

> ...
> > +/* Free all the accounting infor and disk space we reserved for a new btree. */
> > +void
> > +xrep_newbt_destroy(
> > +	struct xrep_newbt	*xnr,
> > +	int			error)
> > +{
> > +	struct repair_ctx	*sc = xnr->sc;
> > +	struct xrep_newbt_resv	*resv, *n;
> > +
> > +	if (error)
> > +		goto junkit;
> 
> Could use a comment on why we skip block freeing here..

I wonder what was the original reason for that?

IIRC if we actually error out of btree rebuilds then we've done
something totally wrong while setting up the btree loader, or the
storage is so broken that writes failed.  Repair is just going to call
do_error() to terminate (and leave us with a broken filesystem) so we
could just terminate right there at the top.

> I'm also wondering if we can check error in the primary loop and kill
> the label and duplicate loop, but I guess that depends on whether the
> fields are always valid.

I think they are.

> > +
> > +	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
> > +		/* We don't have EFIs here so skip the EFD. */
> > +
> > +		/* Free every block we didn't use. */
> > +		resv->fsbno += resv->used;
> > +		resv->len -= resv->used;
> > +		resv->used = 0;
> > +
> > +		if (resv->len > 0) {
> > +			trace_xrep_newbt_unreserve_space(sc->mp,
> > +					XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> > +					XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> > +					resv->len, xnr->oinfo.oi_owner);
> > +
> > +			__libxfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
> > +					&xnr->oinfo, true);

TBH for repair I don't even think we need this, since in theory we
reserved *exactly* the correct number of blocks for the btree.  Hmm.

> > +		}
> > +
> > +		list_del(&resv->list);
> > +		kmem_free(resv);
> > +	}
> > +
> > +junkit:
> > +	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
> > +		list_del(&resv->list);
> > +		kmem_free(resv);
> > +	}
> > +
> > +	if (sc->ip) {
> > +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> > +		xnr->ifake.if_fork = NULL;
> > +	}
> > +}
> > +
> ...
> > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > index 9d72fa8e..8fbd3649 100644
> > --- a/repair/xfs_repair.c
> > +++ b/repair/xfs_repair.c
> ...
> > @@ -49,6 +52,8 @@ static char *o_opts[] = {
> >  	[AG_STRIDE]		= "ag_stride",
> >  	[FORCE_GEO]		= "force_geometry",
> >  	[PHASE2_THREADS]	= "phase2_threads",
> > +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> > +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
> 
> Why the "debug_" in the option names?

These are debugging knobs; there's no reason why any normal user would
want to override the automatic slack sizing algorithms.  I also
refrained from documenting them in the manpage. :P

However, the knobs have been useful for stress-testing w/ fstests.

--D

> Brian
> 
> >  	[O_MAX_OPTS]		= NULL,
> >  };
> >  
> > @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
> >  		_("-o phase2_threads requires a parameter\n"));
> >  					phase2_threads = (int)strtol(val, NULL, 0);
> >  					break;
> > +				case BLOAD_LEAF_SLACK:
> > +					if (!val)
> > +						do_abort(
> > +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> > +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> > +					break;
> > +				case BLOAD_NODE_SLACK:
> > +					if (!val)
> > +						do_abort(
> > +		_("-o debug_bload_node_slack requires a parameter\n"));
> > +					bload_node_slack = (int)strtol(val, NULL, 0);
> > +					break;
> >  				default:
> >  					unknown('o', val);
> >  					break;
> > 
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-09 16:31 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
@ 2020-05-14 15:09   ` Brian Foster
  2020-05-14 19:20     ` Darrick J. Wong
  0 siblings, 1 reply; 25+ messages in thread
From: Brian Foster @ 2020-05-14 15:09 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Sat, May 09, 2020 at 09:31:47AM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Port the new btree staging context and related block reservation helper
> code from the kernel to repair.  We'll use this in subsequent patches to
> implement btree bulk loading.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  include/libxfs.h         |    1 
>  libxfs/libxfs_api_defs.h |    2 
>  repair/Makefile          |    4 -
>  repair/bload.c           |  276 ++++++++++++++++++++++++++++++++++++++++++++++
>  repair/bload.h           |   79 +++++++++++++
>  repair/xfs_repair.c      |   17 +++
>  6 files changed, 377 insertions(+), 2 deletions(-)
>  create mode 100644 repair/bload.c
>  create mode 100644 repair/bload.h
> 
> 
...
> diff --git a/repair/bload.c b/repair/bload.c
> new file mode 100644
> index 00000000..ab05815c
> --- /dev/null
> +++ b/repair/bload.c
> @@ -0,0 +1,276 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + */
> +#include <libxfs.h>
> +#include "bload.h"
> +
> +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> +#define trace_xrep_newbt_reserve_space(...)	((void) 0)
> +#define trace_xrep_newbt_unreserve_space(...)	((void) 0)
> +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> +
> +int bload_leaf_slack = -1;
> +int bload_node_slack = -1;
> +
> +/* Ported routines from fs/xfs/scrub/repair.c */
> +

Any plans to generalize/lift more of this stuff into libxfs if it's
going to be shared with xfsprogs?

...
> +/* Free all the accounting infor and disk space we reserved for a new btree. */
> +void
> +xrep_newbt_destroy(
> +	struct xrep_newbt	*xnr,
> +	int			error)
> +{
> +	struct repair_ctx	*sc = xnr->sc;
> +	struct xrep_newbt_resv	*resv, *n;
> +
> +	if (error)
> +		goto junkit;

Could use a comment on why we skip block freeing here..

I'm also wondering if we can check error in the primary loop and kill
the label and duplicate loop, but I guess that depends on whether the
fields are always valid.

> +
> +	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
> +		/* We don't have EFIs here so skip the EFD. */
> +
> +		/* Free every block we didn't use. */
> +		resv->fsbno += resv->used;
> +		resv->len -= resv->used;
> +		resv->used = 0;
> +
> +		if (resv->len > 0) {
> +			trace_xrep_newbt_unreserve_space(sc->mp,
> +					XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> +					XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> +					resv->len, xnr->oinfo.oi_owner);
> +
> +			__libxfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
> +					&xnr->oinfo, true);
> +		}
> +
> +		list_del(&resv->list);
> +		kmem_free(resv);
> +	}
> +
> +junkit:
> +	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
> +		list_del(&resv->list);
> +		kmem_free(resv);
> +	}
> +
> +	if (sc->ip) {
> +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> +		xnr->ifake.if_fork = NULL;
> +	}
> +}
> +
...
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 9d72fa8e..8fbd3649 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
...
> @@ -49,6 +52,8 @@ static char *o_opts[] = {
>  	[AG_STRIDE]		= "ag_stride",
>  	[FORCE_GEO]		= "force_geometry",
>  	[PHASE2_THREADS]	= "phase2_threads",
> +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",

Why the "debug_" in the option names?

Brian

>  	[O_MAX_OPTS]		= NULL,
>  };
>  
> @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
>  		_("-o phase2_threads requires a parameter\n"));
>  					phase2_threads = (int)strtol(val, NULL, 0);
>  					break;
> +				case BLOAD_LEAF_SLACK:
> +					if (!val)
> +						do_abort(
> +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> +					break;
> +				case BLOAD_NODE_SLACK:
> +					if (!val)
> +						do_abort(
> +		_("-o debug_bload_node_slack requires a parameter\n"));
> +					bload_node_slack = (int)strtol(val, NULL, 0);
> +					break;
>  				default:
>  					unknown('o', val);
>  					break;
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-05-09 16:31 [PATCH v4 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
@ 2020-05-09 16:31 ` Darrick J. Wong
  2020-05-14 15:09   ` Brian Foster
  0 siblings, 1 reply; 25+ messages in thread
From: Darrick J. Wong @ 2020-05-09 16:31 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <darrick.wong@oracle.com>

Port the new btree staging context and related block reservation helper
code from the kernel to repair.  We'll use this in subsequent patches to
implement btree bulk loading.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/libxfs.h         |    1 
 libxfs/libxfs_api_defs.h |    2 
 repair/Makefile          |    4 -
 repair/bload.c           |  276 ++++++++++++++++++++++++++++++++++++++++++++++
 repair/bload.h           |   79 +++++++++++++
 repair/xfs_repair.c      |   17 +++
 6 files changed, 377 insertions(+), 2 deletions(-)
 create mode 100644 repair/bload.c
 create mode 100644 repair/bload.h


diff --git a/include/libxfs.h b/include/libxfs.h
index 12447835..b9370139 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -76,6 +76,7 @@ struct iomap;
 #include "xfs_rmap.h"
 #include "xfs_refcount_btree.h"
 #include "xfs_refcount.h"
+#include "xfs_btree_staging.h"
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index be06c763..61047f8f 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -27,12 +27,14 @@
 #define xfs_alloc_fix_freelist		libxfs_alloc_fix_freelist
 #define xfs_alloc_min_freelist		libxfs_alloc_min_freelist
 #define xfs_alloc_read_agf		libxfs_alloc_read_agf
+#define xfs_alloc_vextent		libxfs_alloc_vextent
 
 #define xfs_attr_get			libxfs_attr_get
 #define xfs_attr_leaf_newentsize	libxfs_attr_leaf_newentsize
 #define xfs_attr_namecheck		libxfs_attr_namecheck
 #define xfs_attr_set			libxfs_attr_set
 
+#define __xfs_bmap_add_free		__libxfs_bmap_add_free
 #define xfs_bmapi_read			libxfs_bmapi_read
 #define xfs_bmapi_write			libxfs_bmapi_write
 #define xfs_bmap_last_offset		libxfs_bmap_last_offset
diff --git a/repair/Makefile b/repair/Makefile
index 0964499a..8cc1ee68 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -9,11 +9,11 @@ LSRCFILES = README
 
 LTCOMMAND = xfs_repair
 
-HFILES = agheader.h attr_repair.h avl.h bmap.h btree.h \
+HFILES = agheader.h attr_repair.h avl.h bload.h bmap.h btree.h \
 	da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
 	rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
 
-CFILES = agheader.c attr_repair.c avl.c bmap.c btree.c \
+CFILES = agheader.c attr_repair.c avl.c bload.c bmap.c btree.c \
 	da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
 	incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
 	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
diff --git a/repair/bload.c b/repair/bload.c
new file mode 100644
index 00000000..ab05815c
--- /dev/null
+++ b/repair/bload.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <libxfs.h>
+#include "bload.h"
+
+#define trace_xrep_newbt_claim_block(...)	((void) 0)
+#define trace_xrep_newbt_reserve_space(...)	((void) 0)
+#define trace_xrep_newbt_unreserve_space(...)	((void) 0)
+#define trace_xrep_newbt_claim_block(...)	((void) 0)
+
+int bload_leaf_slack = -1;
+int bload_node_slack = -1;
+
+/* Ported routines from fs/xfs/scrub/repair.c */
+
+/*
+ * Roll a transaction, keeping the AG headers locked and reinitializing
+ * the btree cursors.
+ */
+int
+xrep_roll_ag_trans(
+	struct repair_ctx	*sc)
+{
+	int			error;
+
+	/* Keep the AG header buffers locked so we can keep going. */
+	if (sc->agi_bp)
+		libxfs_trans_bhold(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bhold(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
+
+	/*
+	 * Roll the transaction.  We still own the buffer and the buffer lock
+	 * regardless of whether or not the roll succeeds.  If the roll fails,
+	 * the buffers will be released during teardown on our way out of the
+	 * kernel.  If it succeeds, we join them to the new transaction and
+	 * move on.
+	 */
+	error = -libxfs_trans_roll(&sc->tp);
+	if (error)
+		return error;
+
+	/* Join AG headers to the new transaction. */
+	if (sc->agi_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
+
+	return 0;
+}
+
+/* Initialize accounting resources for staging a new AG btree. */
+void
+xrep_newbt_init_ag(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_fsblock_t			alloc_hint,
+	enum xfs_ag_resv_type		resv)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = alloc_hint;
+	xnr->resv = resv;
+	INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/* Initialize accounting resources for staging a new inode fork btree. */
+void
+xrep_newbt_init_inode(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	int				whichfork,
+	const struct xfs_owner_info	*oinfo)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino);
+	xnr->resv = XFS_AG_RESV_NONE;
+	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
+	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
+	INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/*
+ * Initialize accounting resources for staging a new btree.  Callers are
+ * expected to add their own reservations (and clean them up) manually.
+ */
+void
+xrep_newbt_init_bare(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc)
+{
+	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
+			XFS_AG_RESV_NONE);
+}
+
+/* Add a space reservation manually. */
+int
+xrep_newbt_add_reservation(
+	struct xrep_newbt		*xnr,
+	xfs_fsblock_t			fsbno,
+	xfs_extlen_t			len,
+	void				*priv)
+{
+	struct xrep_newbt_resv	*resv;
+
+	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
+	if (!resv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&resv->list);
+	resv->fsbno = fsbno;
+	resv->len = len;
+	resv->used = 0;
+	resv->priv = priv;
+	list_add_tail(&resv->list, &xnr->reservations);
+	return 0;
+}
+
+/* Reserve disk space for our new btree. */
+int
+xrep_newbt_reserve_space(
+	struct xrep_newbt	*xnr,
+	uint64_t		nr_blocks)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	xfs_alloctype_t		type;
+	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
+	int			error = 0;
+
+	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
+
+	while (nr_blocks > 0 && !error) {
+		struct xfs_alloc_arg	args = {
+			.tp		= sc->tp,
+			.mp		= sc->mp,
+			.type		= type,
+			.fsbno		= alloc_hint,
+			.oinfo		= xnr->oinfo,
+			.minlen		= 1,
+			.maxlen		= nr_blocks,
+			.prod		= 1,
+			.resv		= xnr->resv,
+		};
+
+		error = -libxfs_alloc_vextent(&args);
+		if (error)
+			return error;
+		if (args.fsbno == NULLFSBLOCK)
+			return -ENOSPC;
+
+		trace_xrep_newbt_reserve_space(sc->mp,
+				XFS_FSB_TO_AGNO(sc->mp, args.fsbno),
+				XFS_FSB_TO_AGBNO(sc->mp, args.fsbno),
+				args.len, xnr->oinfo.oi_owner);
+
+		/* We don't have real EFIs here so skip that. */
+
+		error = xrep_newbt_add_reservation(xnr, args.fsbno, args.len,
+				NULL);
+		if (error)
+			break;
+
+		nr_blocks -= args.len;
+		alloc_hint = args.fsbno + args.len - 1;
+
+		if (sc->ip)
+			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
+		else
+			error = xrep_roll_ag_trans(sc);
+	}
+
+	return error;
+}
+
+/* Free all the accounting infor and disk space we reserved for a new btree. */
+void
+xrep_newbt_destroy(
+	struct xrep_newbt	*xnr,
+	int			error)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	struct xrep_newbt_resv	*resv, *n;
+
+	if (error)
+		goto junkit;
+
+	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+		/* We don't have EFIs here so skip the EFD. */
+
+		/* Free every block we didn't use. */
+		resv->fsbno += resv->used;
+		resv->len -= resv->used;
+		resv->used = 0;
+
+		if (resv->len > 0) {
+			trace_xrep_newbt_unreserve_space(sc->mp,
+					XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
+					XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
+					resv->len, xnr->oinfo.oi_owner);
+
+			__libxfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
+					&xnr->oinfo, true);
+		}
+
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+junkit:
+	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+	if (sc->ip) {
+		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
+		xnr->ifake.if_fork = NULL;
+	}
+}
+
+/* Feed one of the reserved btree blocks to the bulk loader. */
+int
+xrep_newbt_claim_block(
+	struct xfs_btree_cur	*cur,
+	struct xrep_newbt	*xnr,
+	union xfs_btree_ptr	*ptr)
+{
+	struct xrep_newbt_resv	*resv;
+	xfs_fsblock_t		fsb;
+
+	/*
+	 * If last_resv doesn't have a block for us, move forward until we find
+	 * one that does (or run out of reservations).
+	 */
+	if (xnr->last_resv == NULL) {
+		list_for_each_entry(resv, &xnr->reservations, list) {
+			if (resv->used < resv->len) {
+				xnr->last_resv = resv;
+				break;
+			}
+		}
+		if (xnr->last_resv == NULL)
+			return -ENOSPC;
+	} else if (xnr->last_resv->used == xnr->last_resv->len) {
+		if (xnr->last_resv->list.next == &xnr->reservations)
+			return -ENOSPC;
+		xnr->last_resv = list_entry(xnr->last_resv->list.next,
+				struct xrep_newbt_resv, list);
+	}
+
+	/* Nab the block. */
+	fsb = xnr->last_resv->fsbno + xnr->last_resv->used;
+	xnr->last_resv->used++;
+
+	trace_xrep_newbt_claim_block(cur->bc_mp,
+			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
+			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
+			xnr->oinfo.oi_owner);
+
+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+		ptr->l = cpu_to_be64(fsb);
+	else
+		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
+	return 0;
+}
diff --git a/repair/bload.h b/repair/bload.h
new file mode 100644
index 00000000..ba5f6d0b
--- /dev/null
+++ b/repair/bload.h
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_REPAIR_BLOAD_H__
+#define __XFS_REPAIR_BLOAD_H__
+
+extern int bload_leaf_slack;
+extern int bload_node_slack;
+
+struct repair_ctx {
+	struct xfs_mount	*mp;
+	struct xfs_inode	*ip;
+	struct xfs_trans	*tp;
+
+	struct xfs_buf		*agi_bp;
+	struct xfs_buf		*agf_bp;
+	struct xfs_buf		*agfl_bp;
+};
+
+struct xrep_newbt_resv {
+	/* Link to list of extents that we've reserved. */
+	struct list_head	list;
+
+	void			*priv;
+
+	/* FSB of the block we reserved. */
+	xfs_fsblock_t		fsbno;
+
+	/* Length of the reservation. */
+	xfs_extlen_t		len;
+
+	/* How much of this reservation we've used. */
+	xfs_extlen_t		used;
+};
+
+struct xrep_newbt {
+	struct repair_ctx	*sc;
+
+	/* List of extents that we've reserved. */
+	struct list_head	reservations;
+
+	/* Fake root for new btree. */
+	union {
+		struct xbtree_afakeroot	afake;
+		struct xbtree_ifakeroot	ifake;
+	};
+
+	/* rmap owner of these blocks */
+	struct xfs_owner_info	oinfo;
+
+	/* The last reservation we allocated from. */
+	struct xrep_newbt_resv	*last_resv;
+
+	/* Allocation hint */
+	xfs_fsblock_t		alloc_hint;
+
+	/* per-ag reservation type */
+	enum xfs_ag_resv_type	resv;
+};
+
+#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
+	list_for_each_entry_safe((resv), (n), &(xnr)->reservations, list)
+
+void xrep_newbt_init_bare(struct xrep_newbt *xba, struct repair_ctx *sc);
+void xrep_newbt_init_ag(struct xrep_newbt *xba, struct repair_ctx *sc,
+		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
+		enum xfs_ag_resv_type resv);
+void xrep_newbt_init_inode(struct xrep_newbt *xba, struct repair_ctx *sc,
+		int whichfork, const struct xfs_owner_info *oinfo);
+int xrep_newbt_add_reservation(struct xrep_newbt *xba, xfs_fsblock_t fsbno,
+		xfs_extlen_t len, void *priv);
+int xrep_newbt_reserve_space(struct xrep_newbt *xba, uint64_t nr_blocks);
+void xrep_newbt_destroy(struct xrep_newbt *xba, int error);
+int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xba,
+		union xfs_btree_ptr *ptr);
+
+#endif /* __XFS_REPAIR_BLOAD_H__ */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 9d72fa8e..8fbd3649 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -24,6 +24,7 @@
 #include "rmap.h"
 #include "libfrog/fsgeom.h"
 #include "libfrog/platform.h"
+#include "bload.h"
 
 /*
  * option tables for getsubopt calls
@@ -39,6 +40,8 @@ enum o_opt_nums {
 	AG_STRIDE,
 	FORCE_GEO,
 	PHASE2_THREADS,
+	BLOAD_LEAF_SLACK,
+	BLOAD_NODE_SLACK,
 	O_MAX_OPTS,
 };
 
@@ -49,6 +52,8 @@ static char *o_opts[] = {
 	[AG_STRIDE]		= "ag_stride",
 	[FORCE_GEO]		= "force_geometry",
 	[PHASE2_THREADS]	= "phase2_threads",
+	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
+	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
 	[O_MAX_OPTS]		= NULL,
 };
 
@@ -260,6 +265,18 @@ process_args(int argc, char **argv)
 		_("-o phase2_threads requires a parameter\n"));
 					phase2_threads = (int)strtol(val, NULL, 0);
 					break;
+				case BLOAD_LEAF_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_leaf_slack requires a parameter\n"));
+					bload_leaf_slack = (int)strtol(val, NULL, 0);
+					break;
+				case BLOAD_NODE_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_node_slack requires a parameter\n"));
+					bload_node_slack = (int)strtol(val, NULL, 0);
+					break;
 				default:
 					unknown('o', val);
 					break;


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2020-03-04  3:29 [PATCH v3 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
@ 2020-03-04  3:29 ` Darrick J. Wong
  0 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2020-03-04  3:29 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <darrick.wong@oracle.com>

Port the new btree staging context and related block reservation helper
code from the kernel to repair.  We'll use this in subsequent patches to
implement btree bulk loading.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    2 
 repair/Makefile          |    4 -
 repair/bload.c           |  276 ++++++++++++++++++++++++++++++++++++++++++++++
 repair/bload.h           |   79 +++++++++++++
 repair/xfs_repair.c      |   17 +++
 5 files changed, 376 insertions(+), 2 deletions(-)
 create mode 100644 repair/bload.c
 create mode 100644 repair/bload.h


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 1149e301..8aefd342 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -28,6 +28,7 @@
 #define xfs_alloc_fix_freelist		libxfs_alloc_fix_freelist
 #define xfs_alloc_min_freelist		libxfs_alloc_min_freelist
 #define xfs_alloc_read_agf		libxfs_alloc_read_agf
+#define xfs_alloc_vextent		libxfs_alloc_vextent
 
 #define xfs_attr_get			libxfs_attr_get
 #define xfs_attr_leaf_newentsize	libxfs_attr_leaf_newentsize
@@ -35,6 +36,7 @@
 #define xfs_attr_remove			libxfs_attr_remove
 #define xfs_attr_set			libxfs_attr_set
 
+#define __xfs_bmap_add_free		__libxfs_bmap_add_free
 #define xfs_bmapi_read			libxfs_bmapi_read
 #define xfs_bmapi_write			libxfs_bmapi_write
 #define xfs_bmap_last_offset		libxfs_bmap_last_offset
diff --git a/repair/Makefile b/repair/Makefile
index 0964499a..8cc1ee68 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -9,11 +9,11 @@ LSRCFILES = README
 
 LTCOMMAND = xfs_repair
 
-HFILES = agheader.h attr_repair.h avl.h bmap.h btree.h \
+HFILES = agheader.h attr_repair.h avl.h bload.h bmap.h btree.h \
 	da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
 	rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
 
-CFILES = agheader.c attr_repair.c avl.c bmap.c btree.c \
+CFILES = agheader.c attr_repair.c avl.c bload.c bmap.c btree.c \
 	da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
 	incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
 	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
diff --git a/repair/bload.c b/repair/bload.c
new file mode 100644
index 00000000..7aaadd1d
--- /dev/null
+++ b/repair/bload.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <libxfs.h>
+#include "bload.h"
+
+#define trace_xrep_newbt_alloc_block(...)	((void) 0)
+#define trace_xrep_newbt_reserve_space(...)	((void) 0)
+#define trace_xrep_newbt_unreserve_space(...)	((void) 0)
+#define trace_xrep_newbt_alloc_block(...)	((void) 0)
+
+int bload_leaf_slack = -1;
+int bload_node_slack = -1;
+
+/* Ported routines from fs/xfs/scrub/repair.c */
+
+/*
+ * Roll a transaction, keeping the AG headers locked and reinitializing
+ * the btree cursors.
+ */
+int
+xrep_roll_ag_trans(
+	struct repair_ctx	*sc)
+{
+	int			error;
+
+	/* Keep the AG header buffers locked so we can keep going. */
+	if (sc->agi_bp)
+		libxfs_trans_bhold(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bhold(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
+
+	/*
+	 * Roll the transaction.  We still own the buffer and the buffer lock
+	 * regardless of whether or not the roll succeeds.  If the roll fails,
+	 * the buffers will be released during teardown on our way out of the
+	 * kernel.  If it succeeds, we join them to the new transaction and
+	 * move on.
+	 */
+	error = -libxfs_trans_roll(&sc->tp);
+	if (error)
+		return error;
+
+	/* Join AG headers to the new transaction. */
+	if (sc->agi_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
+
+	return 0;
+}
+
+/* Initialize accounting resources for staging a new AG btree. */
+void
+xrep_newbt_init_ag(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_fsblock_t			alloc_hint,
+	enum xfs_ag_resv_type		resv)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = alloc_hint;
+	xnr->resv = resv;
+	INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/* Initialize accounting resources for staging a new inode fork btree. */
+void
+xrep_newbt_init_inode(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	int				whichfork,
+	const struct xfs_owner_info	*oinfo)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino);
+	xnr->resv = XFS_AG_RESV_NONE;
+	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
+	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
+	INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/*
+ * Initialize accounting resources for staging a new btree.  Callers are
+ * expected to add their own reservations (and clean them up) manually.
+ */
+void
+xrep_newbt_init_bare(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc)
+{
+	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
+			XFS_AG_RESV_NONE);
+}
+
+/* Add a space reservation manually. */
+int
+xrep_newbt_add_reservation(
+	struct xrep_newbt		*xnr,
+	xfs_fsblock_t			fsbno,
+	xfs_extlen_t			len,
+	void				*priv)
+{
+	struct xrep_newbt_resv	*resv;
+
+	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
+	if (!resv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&resv->list);
+	resv->fsbno = fsbno;
+	resv->len = len;
+	resv->used = 0;
+	resv->priv = priv;
+	list_add_tail(&resv->list, &xnr->reservations);
+	return 0;
+}
+
+/* Reserve disk space for our new btree. */
+int
+xrep_newbt_reserve_space(
+	struct xrep_newbt	*xnr,
+	uint64_t		nr_blocks)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	xfs_alloctype_t		type;
+	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
+	int			error = 0;
+
+	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
+
+	while (nr_blocks > 0 && !error) {
+		struct xfs_alloc_arg	args = {
+			.tp		= sc->tp,
+			.mp		= sc->mp,
+			.type		= type,
+			.fsbno		= alloc_hint,
+			.oinfo		= xnr->oinfo,
+			.minlen		= 1,
+			.maxlen		= nr_blocks,
+			.prod		= 1,
+			.resv		= xnr->resv,
+		};
+
+		error = -libxfs_alloc_vextent(&args);
+		if (error)
+			return error;
+		if (args.fsbno == NULLFSBLOCK)
+			return -ENOSPC;
+
+		trace_xrep_newbt_reserve_space(sc->mp,
+				XFS_FSB_TO_AGNO(sc->mp, args.fsbno),
+				XFS_FSB_TO_AGBNO(sc->mp, args.fsbno),
+				args.len, xnr->oinfo.oi_owner);
+
+		/* We don't have real EFIs here so skip that. */
+
+		error = xrep_newbt_add_reservation(xnr, args.fsbno, args.len,
+				NULL);
+		if (error)
+			break;
+
+		nr_blocks -= args.len;
+		alloc_hint = args.fsbno + args.len - 1;
+
+		if (sc->ip)
+			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
+		else
+			error = xrep_roll_ag_trans(sc);
+	}
+
+	return error;
+}
+
+/* Free all the accounting infor and disk space we reserved for a new btree. */
+void
+xrep_newbt_destroy(
+	struct xrep_newbt	*xnr,
+	int			error)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	struct xrep_newbt_resv	*resv, *n;
+
+	if (error)
+		goto junkit;
+
+	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+		/* We don't have EFIs here so skip the EFD. */
+
+		/* Free every block we didn't use. */
+		resv->fsbno += resv->used;
+		resv->len -= resv->used;
+		resv->used = 0;
+
+		if (resv->len > 0) {
+			trace_xrep_newbt_unreserve_space(sc->mp,
+					XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
+					XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
+					resv->len, xnr->oinfo.oi_owner);
+
+			__libxfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
+					&xnr->oinfo, true);
+		}
+
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+junkit:
+	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+	if (sc->ip) {
+		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
+		xnr->ifake.if_fork = NULL;
+	}
+}
+
+/* Feed one of the reserved btree blocks to the bulk loader. */
+int
+xrep_newbt_alloc_block(
+	struct xfs_btree_cur	*cur,
+	struct xrep_newbt	*xnr,
+	union xfs_btree_ptr	*ptr)
+{
+	struct xrep_newbt_resv	*resv;
+	xfs_fsblock_t		fsb;
+
+	/*
+	 * If last_resv doesn't have a block for us, move forward until we find
+	 * one that does (or run out of reservations).
+	 */
+	if (xnr->last_resv == NULL) {
+		list_for_each_entry(resv, &xnr->reservations, list) {
+			if (resv->used < resv->len) {
+				xnr->last_resv = resv;
+				break;
+			}
+		}
+		if (xnr->last_resv == NULL)
+			return -ENOSPC;
+	} else if (xnr->last_resv->used == xnr->last_resv->len) {
+		if (xnr->last_resv->list.next == &xnr->reservations)
+			return -ENOSPC;
+		xnr->last_resv = list_entry(xnr->last_resv->list.next,
+				struct xrep_newbt_resv, list);
+	}
+
+	/* Nab the block. */
+	fsb = xnr->last_resv->fsbno + xnr->last_resv->used;
+	xnr->last_resv->used++;
+
+	trace_xrep_newbt_alloc_block(cur->bc_mp,
+			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
+			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
+			xnr->oinfo.oi_owner);
+
+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+		ptr->l = cpu_to_be64(fsb);
+	else
+		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
+	return 0;
+}
diff --git a/repair/bload.h b/repair/bload.h
new file mode 100644
index 00000000..7ffb8dcc
--- /dev/null
+++ b/repair/bload.h
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_REPAIR_BLOAD_H__
+#define __XFS_REPAIR_BLOAD_H__
+
+extern int bload_leaf_slack;
+extern int bload_node_slack;
+
+struct repair_ctx {
+	struct xfs_mount	*mp;
+	struct xfs_inode	*ip;
+	struct xfs_trans	*tp;
+
+	struct xfs_buf		*agi_bp;
+	struct xfs_buf		*agf_bp;
+	struct xfs_buf		*agfl_bp;
+};
+
+struct xrep_newbt_resv {
+	/* Link to list of extents that we've reserved. */
+	struct list_head	list;
+
+	void			*priv;
+
+	/* FSB of the block we reserved. */
+	xfs_fsblock_t		fsbno;
+
+	/* Length of the reservation. */
+	xfs_extlen_t		len;
+
+	/* How much of this reservation we've used. */
+	xfs_extlen_t		used;
+};
+
+struct xrep_newbt {
+	struct repair_ctx	*sc;
+
+	/* List of extents that we've reserved. */
+	struct list_head	reservations;
+
+	/* Fake root for new btree. */
+	union {
+		struct xbtree_afakeroot	afake;
+		struct xbtree_ifakeroot	ifake;
+	};
+
+	/* rmap owner of these blocks */
+	struct xfs_owner_info	oinfo;
+
+	/* The last reservation we allocated from. */
+	struct xrep_newbt_resv	*last_resv;
+
+	/* Allocation hint */
+	xfs_fsblock_t		alloc_hint;
+
+	/* per-ag reservation type */
+	enum xfs_ag_resv_type	resv;
+};
+
+#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
+	list_for_each_entry_safe((resv), (n), &(xnr)->reservations, list)
+
+void xrep_newbt_init_bare(struct xrep_newbt *xba, struct repair_ctx *sc);
+void xrep_newbt_init_ag(struct xrep_newbt *xba, struct repair_ctx *sc,
+		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
+		enum xfs_ag_resv_type resv);
+void xrep_newbt_init_inode(struct xrep_newbt *xba, struct repair_ctx *sc,
+		int whichfork, const struct xfs_owner_info *oinfo);
+int xrep_newbt_add_reservation(struct xrep_newbt *xba, xfs_fsblock_t fsbno,
+		xfs_extlen_t len, void *priv);
+int xrep_newbt_reserve_space(struct xrep_newbt *xba, uint64_t nr_blocks);
+void xrep_newbt_destroy(struct xrep_newbt *xba, int error);
+int xrep_newbt_alloc_block(struct xfs_btree_cur *cur, struct xrep_newbt *xba,
+		union xfs_btree_ptr *ptr);
+
+#endif /* __XFS_REPAIR_BLOAD_H__ */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 4d37ddc6..fd68aaee 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -24,6 +24,7 @@
 #include "rmap.h"
 #include "libfrog/fsgeom.h"
 #include "libfrog/platform.h"
+#include "bload.h"
 
 /*
  * option tables for getsubopt calls
@@ -39,6 +40,8 @@ enum o_opt_nums {
 	AG_STRIDE,
 	FORCE_GEO,
 	PHASE2_THREADS,
+	BLOAD_LEAF_SLACK,
+	BLOAD_NODE_SLACK,
 	O_MAX_OPTS,
 };
 
@@ -49,6 +52,8 @@ static char *o_opts[] = {
 	[AG_STRIDE]		= "ag_stride",
 	[FORCE_GEO]		= "force_geometry",
 	[PHASE2_THREADS]	= "phase2_threads",
+	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
+	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
 	[O_MAX_OPTS]		= NULL,
 };
 
@@ -260,6 +265,18 @@ process_args(int argc, char **argv)
 		_("-o phase2_threads requires a parameter\n"));
 					phase2_threads = (int)strtol(val, NULL, 0);
 					break;
+				case BLOAD_LEAF_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_leaf_slack requires a parameter\n"));
+					bload_leaf_slack = (int)strtol(val, NULL, 0);
+					break;
+				case BLOAD_NODE_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_node_slack requires a parameter\n"));
+					bload_node_slack = (int)strtol(val, NULL, 0);
+					break;
 				default:
 					unknown('o', val);
 					break;


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 1/9] xfs_repair: port the online repair newbt structure
  2019-10-29 23:45 [PATCH RFC 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
@ 2019-10-29 23:45 ` Darrick J. Wong
  0 siblings, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2019-10-29 23:45 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Port the new btree staging context and related block reservation helper
code from the kernel to repair.  We'll use this in subsequent patches to
implement btree bulk loading.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/libxfs_api_defs.h |    2 
 repair/Makefile          |    4 -
 repair/bload.c           |  276 ++++++++++++++++++++++++++++++++++++++++++++++
 repair/bload.h           |   79 +++++++++++++
 repair/xfs_repair.c      |   17 +++
 5 files changed, 376 insertions(+), 2 deletions(-)
 create mode 100644 repair/bload.c
 create mode 100644 repair/bload.h


diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 645c9b1b..7631edf6 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -50,6 +50,8 @@
 #define xfs_attr_remove			libxfs_attr_remove
 #define xfs_attr_leaf_newentsize	libxfs_attr_leaf_newentsize
 
+#define xfs_alloc_vextent		libxfs_alloc_vextent
+#define __xfs_bmap_add_free		__libxfs_bmap_add_free
 #define xfs_agfl_walk			libxfs_agfl_walk
 #define xfs_alloc_fix_freelist		libxfs_alloc_fix_freelist
 #define xfs_alloc_min_freelist		libxfs_alloc_min_freelist
diff --git a/repair/Makefile b/repair/Makefile
index 0964499a..8cc1ee68 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -9,11 +9,11 @@ LSRCFILES = README
 
 LTCOMMAND = xfs_repair
 
-HFILES = agheader.h attr_repair.h avl.h bmap.h btree.h \
+HFILES = agheader.h attr_repair.h avl.h bload.h bmap.h btree.h \
 	da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
 	rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
 
-CFILES = agheader.c attr_repair.c avl.c bmap.c btree.c \
+CFILES = agheader.c attr_repair.c avl.c bload.c bmap.c btree.c \
 	da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
 	incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
 	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
diff --git a/repair/bload.c b/repair/bload.c
new file mode 100644
index 00000000..160a240a
--- /dev/null
+++ b/repair/bload.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <libxfs.h>
+#include "bload.h"
+
+#define trace_xrep_newbt_alloc_block(...)	((void) 0)
+#define trace_xrep_newbt_reserve_space(...)	((void) 0)
+#define trace_xrep_newbt_unreserve_space(...)	((void) 0)
+#define trace_xrep_newbt_alloc_block(...)	((void) 0)
+
+int bload_leaf_slack = -1;
+int bload_node_slack = -1;
+
+/* Ported routines from fs/xfs/scrub/repair.c */
+
+/*
+ * Roll a transaction, keeping the AG headers locked and reinitializing
+ * the btree cursors.
+ */
+int
+xrep_roll_ag_trans(
+	struct repair_ctx	*sc)
+{
+	int			error;
+
+	/* Keep the AG header buffers locked so we can keep going. */
+	if (sc->agi_bp)
+		libxfs_trans_bhold(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bhold(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
+
+	/*
+	 * Roll the transaction.  We still own the buffer and the buffer lock
+	 * regardless of whether or not the roll succeeds.  If the roll fails,
+	 * the buffers will be released during teardown on our way out of the
+	 * kernel.  If it succeeds, we join them to the new transaction and
+	 * move on.
+	 */
+	error = -libxfs_trans_roll(&sc->tp);
+	if (error)
+		return error;
+
+	/* Join AG headers to the new transaction. */
+	if (sc->agi_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
+	if (sc->agf_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
+	if (sc->agfl_bp)
+		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
+
+	return 0;
+}
+
+/* Initialize accounting resources for staging a new AG btree. */
+void
+xrep_newbt_init_ag(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	const struct xfs_owner_info	*oinfo,
+	xfs_fsblock_t			alloc_hint,
+	enum xfs_ag_resv_type		resv)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = alloc_hint;
+	xnr->resv = resv;
+	INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/* Initialize accounting resources for staging a new inode fork btree. */
+void
+xrep_newbt_init_inode(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc,
+	int				whichfork,
+	const struct xfs_owner_info	*oinfo)
+{
+	memset(xnr, 0, sizeof(struct xrep_newbt));
+	xnr->sc = sc;
+	xnr->oinfo = *oinfo; /* structure copy */
+	xnr->alloc_hint = XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino);
+	xnr->resv = XFS_AG_RESV_NONE;
+	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
+	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
+	INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/*
+ * Initialize accounting resources for staging a new btree.  Callers are
+ * expected to add their own reservations (and clean them up) manually.
+ */
+void
+xrep_newbt_init_bare(
+	struct xrep_newbt		*xnr,
+	struct repair_ctx		*sc)
+{
+	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
+			XFS_AG_RESV_NONE);
+}
+
+/* Add a space reservation manually. */
+int
+xrep_newbt_add_reservation(
+	struct xrep_newbt		*xnr,
+	xfs_fsblock_t			fsbno,
+	xfs_extlen_t			len,
+	void				*priv)
+{
+	struct xrep_newbt_resv	*resv;
+
+	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
+	if (!resv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&resv->list);
+	resv->fsbno = fsbno;
+	resv->len = len;
+	resv->used = 0;
+	resv->priv = priv;
+	list_add_tail(&resv->list, &xnr->reservations);
+	return 0;
+}
+
+/* Reserve disk space for our new btree. */
+int
+xrep_newbt_reserve_space(
+	struct xrep_newbt	*xnr,
+	uint64_t		nr_blocks)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	xfs_alloctype_t		type;
+	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
+	int			error = 0;
+
+	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
+
+	while (nr_blocks > 0 && !error) {
+		struct xfs_alloc_arg	args = {
+			.tp		= sc->tp,
+			.mp		= sc->mp,
+			.type		= type,
+			.fsbno		= alloc_hint,
+			.oinfo		= xnr->oinfo,
+			.minlen		= 1,
+			.maxlen		= nr_blocks,
+			.prod		= nr_blocks,
+			.resv		= xnr->resv,
+		};
+
+		error = -libxfs_alloc_vextent(&args);
+		if (error)
+			return error;
+		if (args.fsbno == NULLFSBLOCK)
+			return -ENOSPC;
+
+		trace_xrep_newbt_reserve_space(sc->mp,
+				XFS_FSB_TO_AGNO(sc->mp, args.fsbno),
+				XFS_FSB_TO_AGBNO(sc->mp, args.fsbno),
+				args.len, xnr->oinfo.oi_owner);
+
+		/* We don't have real EFIs here so skip that. */
+
+		error = xrep_newbt_add_reservation(xnr, args.fsbno, args.len,
+				NULL);
+		if (error)
+			break;
+
+		nr_blocks -= args.len;
+		alloc_hint = args.fsbno + args.len - 1;
+
+		if (sc->ip)
+			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
+		else
+			error = xrep_roll_ag_trans(sc);
+	}
+
+	return error;
+}
+
+/* Free all the accounting infor and disk space we reserved for a new btree. */
+void
+xrep_newbt_destroy(
+	struct xrep_newbt	*xnr,
+	int			error)
+{
+	struct repair_ctx	*sc = xnr->sc;
+	struct xrep_newbt_resv	*resv, *n;
+
+	if (error)
+		goto junkit;
+
+	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+		/* We don't have EFIs here so skip the EFD. */
+
+		/* Free every block we didn't use. */
+		resv->fsbno += resv->used;
+		resv->len -= resv->used;
+		resv->used = 0;
+
+		if (resv->len > 0) {
+			trace_xrep_newbt_unreserve_space(sc->mp,
+					XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
+					XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
+					resv->len, xnr->oinfo.oi_owner);
+
+			__libxfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
+					&xnr->oinfo, true);
+		}
+
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+junkit:
+	list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+		list_del(&resv->list);
+		kmem_free(resv);
+	}
+
+	if (sc->ip) {
+		kmem_zone_free(xfs_ifork_zone, xnr->ifake.if_fork);
+		xnr->ifake.if_fork = NULL;
+	}
+}
+
+/* Feed one of the reserved btree blocks to the bulk loader. */
+int
+xrep_newbt_alloc_block(
+	struct xfs_btree_cur	*cur,
+	struct xrep_newbt	*xnr,
+	union xfs_btree_ptr	*ptr)
+{
+	struct xrep_newbt_resv	*resv;
+	xfs_fsblock_t		fsb;
+
+	/*
+	 * If last_resv doesn't have a block for us, move forward until we find
+	 * one that does (or run out of reservations).
+	 */
+	if (xnr->last_resv == NULL) {
+		list_for_each_entry(resv, &xnr->reservations, list) {
+			if (resv->used < resv->len) {
+				xnr->last_resv = resv;
+				break;
+			}
+		}
+		if (xnr->last_resv == NULL)
+			return -ENOSPC;
+	} else if (xnr->last_resv->used == xnr->last_resv->len) {
+		if (xnr->last_resv->list.next == &xnr->reservations)
+			return -ENOSPC;
+		xnr->last_resv = list_entry(xnr->last_resv->list.next,
+				struct xrep_newbt_resv, list);
+	}
+
+	/* Nab the block. */
+	fsb = xnr->last_resv->fsbno + xnr->last_resv->used;
+	xnr->last_resv->used++;
+
+	trace_xrep_newbt_alloc_block(cur->bc_mp,
+			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
+			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
+			xnr->oinfo.oi_owner);
+
+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+		ptr->l = cpu_to_be64(fsb);
+	else
+		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
+	return 0;
+}
diff --git a/repair/bload.h b/repair/bload.h
new file mode 100644
index 00000000..8f890157
--- /dev/null
+++ b/repair/bload.h
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_REPAIR_BLOAD_H__
+#define __XFS_REPAIR_BLOAD_H__
+
+extern int bload_leaf_slack;
+extern int bload_node_slack;
+
+struct repair_ctx {
+	struct xfs_mount	*mp;
+	struct xfs_inode	*ip;
+	struct xfs_trans	*tp;
+
+	struct xfs_buf		*agi_bp;
+	struct xfs_buf		*agf_bp;
+	struct xfs_buf		*agfl_bp;
+};
+
+struct xrep_newbt_resv {
+	/* Link to list of extents that we've reserved. */
+	struct list_head	list;
+
+	void			*priv;
+
+	/* FSB of the block we reserved. */
+	xfs_fsblock_t		fsbno;
+
+	/* Length of the reservation. */
+	xfs_extlen_t		len;
+
+	/* How much of this reservation we've used. */
+	xfs_extlen_t		used;
+};
+
+struct xrep_newbt {
+	struct repair_ctx	*sc;
+
+	/* List of extents that we've reserved. */
+	struct list_head	reservations;
+
+	/* Fake root for new btree. */
+	union {
+		struct xbtree_afakeroot	afake;
+		struct xbtree_ifakeroot	ifake;
+	};
+
+	/* rmap owner of these blocks */
+	struct xfs_owner_info	oinfo;
+
+	/* The last reservation we allocated from. */
+	struct xrep_newbt_resv	*last_resv;
+
+	/* Allocation hint */
+	xfs_fsblock_t		alloc_hint;
+
+	/* per-ag reservation type */
+	enum xfs_ag_resv_type	resv;
+};
+
+#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
+	list_for_each_entry_safe((resv), (n), &(xnr)->reservations, list)
+
+void xrep_newbt_init_bare(struct xrep_newbt *xba, struct repair_ctx *sc);
+void xrep_newbt_init_ag(struct xrep_newbt *xba, struct repair_ctx *sc,
+		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
+		enum xfs_ag_resv_type resv);
+void xrep_newbt_init_inode(struct xrep_newbt *xba, struct repair_ctx *sc,
+		int whichfork, const struct xfs_owner_info *oinfo);
+int xrep_newbt_add_reservation(struct xrep_newbt *xba, xfs_fsblock_t fsbno,
+		xfs_extlen_t len, void *priv);
+int xrep_newbt_reserve_space(struct xrep_newbt *xba, uint64_t nr_blocks);
+void xrep_newbt_destroy(struct xrep_newbt *xba, int error);
+int xrep_newbt_alloc_block(struct xfs_btree_cur *cur, struct xrep_newbt *xba,
+		union xfs_btree_ptr *ptr);
+
+#endif /* __XFS_REPAIR_BLOAD_H__ */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 9295673d..ec8b615b 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -24,6 +24,7 @@
 #include "rmap.h"
 #include "libfrog/fsgeom.h"
 #include "libfrog/platform.h"
+#include "bload.h"
 
 /*
  * option tables for getsubopt calls
@@ -39,6 +40,8 @@ enum o_opt_nums {
 	AG_STRIDE,
 	FORCE_GEO,
 	PHASE2_THREADS,
+	BLOAD_LEAF_SLACK,
+	BLOAD_NODE_SLACK,
 	O_MAX_OPTS,
 };
 
@@ -49,6 +52,8 @@ static char *o_opts[] = {
 	[AG_STRIDE]		= "ag_stride",
 	[FORCE_GEO]		= "force_geometry",
 	[PHASE2_THREADS]	= "phase2_threads",
+	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
+	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
 	[O_MAX_OPTS]		= NULL,
 };
 
@@ -260,6 +265,18 @@ process_args(int argc, char **argv)
 		_("-o phase2_threads requires a parameter\n"));
 					phase2_threads = (int)strtol(val, NULL, 0);
 					break;
+				case BLOAD_LEAF_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_leaf_slack requires a parameter\n"));
+					bload_leaf_slack = (int)strtol(val, NULL, 0);
+					break;
+				case BLOAD_NODE_SLACK:
+					if (!val)
+						do_abort(
+		_("-o debug_bload_node_slack requires a parameter\n"));
+					bload_node_slack = (int)strtol(val, NULL, 0);
+					break;
 				default:
 					unknown('o', val);
 					break;


^ permalink raw reply related	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2020-06-02  0:12 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-01  1:21 [PATCH v2 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
2020-01-01  1:21 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
2020-01-01  1:21 ` [PATCH 2/9] xfs_repair: unindent phase 5 function Darrick J. Wong
2020-01-01  1:21 ` [PATCH 3/9] xfs_repair: create a new class of btree rebuild cursors Darrick J. Wong
2020-01-01  1:21 ` [PATCH 4/9] xfs_repair: rebuild free space btrees with bulk loader Darrick J. Wong
2020-01-01  1:21 ` [PATCH 5/9] xfs_repair: rebuild inode " Darrick J. Wong
2020-01-01  1:22 ` [PATCH 6/9] xfs_repair: rebuild reverse mapping " Darrick J. Wong
2020-01-01  1:22 ` [PATCH 7/9] xfs_repair: rebuild refcount " Darrick J. Wong
2020-01-01  1:22 ` [PATCH 8/9] xfs_repair: remove old btree rebuild support code Darrick J. Wong
2020-01-01  1:22 ` [PATCH 9/9] xfs_repair: track blocks lost during btree construction via extents Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2020-05-20  1:50 [PATCH v5 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
2020-05-20  1:50 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
2020-05-27 12:15   ` Brian Foster
2020-05-27 22:34     ` Darrick J. Wong
2020-05-28 15:08       ` Brian Foster
2020-05-29 21:01         ` Darrick J. Wong
2020-06-01 12:03           ` Brian Foster
2020-06-02  0:12             ` Darrick J. Wong
2020-05-09 16:31 [PATCH v4 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
2020-05-09 16:31 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
2020-05-14 15:09   ` Brian Foster
2020-05-14 19:20     ` Darrick J. Wong
2020-05-15 11:41       ` Brian Foster
2020-05-15 18:52         ` Darrick J. Wong
2020-05-15 19:43           ` Brian Foster
2020-03-04  3:29 [PATCH v3 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
2020-03-04  3:29 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong
2019-10-29 23:45 [PATCH RFC 0/9] xfs_repair: use btree bulk loading Darrick J. Wong
2019-10-29 23:45 ` [PATCH 1/9] xfs_repair: port the online repair newbt structure Darrick J. Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).