All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header
@ 2020-10-26 23:33 Darrick J. Wong
  2020-10-26 23:33 ` [PATCH 1/9] xfs: store inode btree block counts in " Darrick J. Wong
                   ` (8 more replies)
  0 siblings, 9 replies; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-26 23:33 UTC (permalink / raw)
  To: sandeen, darrick.wong, bfoster; +Cc: linux-xfs

Hi all,

Years ago, Christoph diagnosed a problem where freeing an inode on a
totally full filesystem could fail due to finobt expansion not being
able to allocate enough blocks.  He solved the problem by using the
per-AG block reservation system to ensure that there are always enough
blocks for finobt expansion, but that came at the cost of having to walk
the entire finobt at mount time.  This new feature solves that
performance regression by adding inode btree block counts to the AGI
header.

v2: rebase kernel to 5.9
v3: split logical changes into separate patches
v4: support inobtcounts && !finobt properly

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=inobt-counters

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=inobt-counters

fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=inobt-counters
---
 db/agi.c                  |    2 +
 db/sb.c                   |   78 ++++++++++++++++++++++++++++++++++++++++++++-
 db/xfs_admin.sh           |    4 ++
 libxfs/xfs_ag.c           |    5 +++
 libxfs/xfs_format.h       |   21 +++++++++++-
 libxfs/xfs_ialloc.c       |    1 +
 libxfs/xfs_ialloc_btree.c |   65 +++++++++++++++++++++++++++++++++++---
 man/man8/mkfs.xfs.8       |   15 +++++++++
 man/man8/xfs_admin.8      |   16 +++++++++
 mkfs/xfs_mkfs.c           |   34 +++++++++++++++++++-
 repair/phase5.c           |    5 +++
 repair/scan.c             |   38 ++++++++++++++++++++--
 12 files changed, 272 insertions(+), 12 deletions(-)


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/9] xfs: store inode btree block counts in AGI header
  2020-10-26 23:33 [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header Darrick J. Wong
@ 2020-10-26 23:33 ` Darrick J. Wong
  2020-10-26 23:33 ` [PATCH 2/9] xfs: use the finobt block counts to speed up mount times Darrick J. Wong
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-26 23:33 UTC (permalink / raw)
  To: sandeen, darrick.wong, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Source kernel commit: 2a39946c984464e4aac82c556ba9915589be7323

Add a btree block usage counters for both inode btrees to the AGI header
so that we don't have to walk the entire finobt at mount time to create
the per-AG reservations.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/xfs_ag.c           |    5 +++++
 libxfs/xfs_format.h       |   18 +++++++++++++++++-
 libxfs/xfs_ialloc.c       |    1 +
 libxfs/xfs_ialloc_btree.c |   21 +++++++++++++++++++++
 4 files changed, 44 insertions(+), 1 deletion(-)


diff --git a/libxfs/xfs_ag.c b/libxfs/xfs_ag.c
index ceaa7e5e4640..af8a0afdda93 100644
--- a/libxfs/xfs_ag.c
+++ b/libxfs/xfs_ag.c
@@ -333,6 +333,11 @@ xfs_agiblock_init(
 	}
 	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
 		agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
+	if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+		agi->agi_iblocks = cpu_to_be32(1);
+		if (xfs_sb_version_hasfinobt(&mp->m_sb))
+			agi->agi_fblocks = cpu_to_be32(1);
+	}
 }
 
 typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp,
diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
index f0ecd064ff14..01544f9d5a57 100644
--- a/libxfs/xfs_format.h
+++ b/libxfs/xfs_format.h
@@ -449,6 +449,7 @@ xfs_sb_has_compat_feature(
 #define XFS_SB_FEAT_RO_COMPAT_FINOBT   (1 << 0)		/* free inode btree */
 #define XFS_SB_FEAT_RO_COMPAT_RMAPBT   (1 << 1)		/* reverse map btree */
 #define XFS_SB_FEAT_RO_COMPAT_REFLINK  (1 << 2)		/* reflinked files */
+#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3)		/* inobt block counts */
 #define XFS_SB_FEAT_RO_COMPAT_ALL \
 		(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
 		 XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
@@ -563,6 +564,17 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp)
 		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
 }
 
+/*
+ * Inode btree block counter.  We record the number of inobt and finobt blocks
+ * in the AGI header so that we can skip the finobt walk at mount time when
+ * setting up per-AG reservations.
+ */
+static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp)
+{
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT);
+}
+
 /*
  * end of superblock version macros
  */
@@ -765,6 +777,9 @@ typedef struct xfs_agi {
 	__be32		agi_free_root; /* root of the free inode btree */
 	__be32		agi_free_level;/* levels in free inode btree */
 
+	__be32		agi_iblocks;	/* inobt blocks used */
+	__be32		agi_fblocks;	/* finobt blocks used */
+
 	/* structure must be padded to 64 bit alignment */
 } xfs_agi_t;
 
@@ -785,7 +800,8 @@ typedef struct xfs_agi {
 #define	XFS_AGI_ALL_BITS_R1	((1 << XFS_AGI_NUM_BITS_R1) - 1)
 #define	XFS_AGI_FREE_ROOT	(1 << 11)
 #define	XFS_AGI_FREE_LEVEL	(1 << 12)
-#define	XFS_AGI_NUM_BITS_R2	13
+#define	XFS_AGI_IBLOCKS		(1 << 13) /* both inobt/finobt block counters */
+#define	XFS_AGI_NUM_BITS_R2	14
 
 /* disk block (xfs_daddr_t) in the AG */
 #define XFS_AGI_DADDR(mp)	((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
index ce73feed981c..8adeb10e5d2e 100644
--- a/libxfs/xfs_ialloc.c
+++ b/libxfs/xfs_ialloc.c
@@ -2468,6 +2468,7 @@ xfs_ialloc_log_agi(
 		offsetof(xfs_agi_t, agi_unlinked),
 		offsetof(xfs_agi_t, agi_free_root),
 		offsetof(xfs_agi_t, agi_free_level),
+		offsetof(xfs_agi_t, agi_iblocks),
 		sizeof(xfs_agi_t)
 	};
 #ifdef DEBUG
diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c
index 41c308c2e54c..3ed87d1c7251 100644
--- a/libxfs/xfs_ialloc_btree.c
+++ b/libxfs/xfs_ialloc_btree.c
@@ -66,6 +66,25 @@ xfs_finobt_set_root(
 			   XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
 }
 
+/* Update the inode btree block counter for this btree. */
+static inline void
+xfs_inobt_mod_blockcount(
+	struct xfs_btree_cur	*cur,
+	int			howmuch)
+{
+	struct xfs_buf		*agbp = cur->bc_ag.agbp;
+	struct xfs_agi		*agi = agbp->b_addr;
+
+	if (!xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb))
+		return;
+
+	if (cur->bc_btnum == XFS_BTNUM_FINO)
+		be32_add_cpu(&agi->agi_fblocks, howmuch);
+	else if (cur->bc_btnum == XFS_BTNUM_INO)
+		be32_add_cpu(&agi->agi_iblocks, howmuch);
+	xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_IBLOCKS);
+}
+
 STATIC int
 __xfs_inobt_alloc_block(
 	struct xfs_btree_cur	*cur,
@@ -101,6 +120,7 @@ __xfs_inobt_alloc_block(
 
 	new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
 	*stat = 1;
+	xfs_inobt_mod_blockcount(cur, 1);
 	return 0;
 }
 
@@ -133,6 +153,7 @@ __xfs_inobt_free_block(
 	struct xfs_buf		*bp,
 	enum xfs_ag_resv_type	resv)
 {
+	xfs_inobt_mod_blockcount(cur, -1);
 	return xfs_free_extent(cur->bc_tp,
 			XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
 			&XFS_RMAP_OINFO_INOBT, resv);


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/9] xfs: use the finobt block counts to speed up mount times
  2020-10-26 23:33 [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header Darrick J. Wong
  2020-10-26 23:33 ` [PATCH 1/9] xfs: store inode btree block counts in " Darrick J. Wong
@ 2020-10-26 23:33 ` Darrick J. Wong
  2020-10-26 23:33 ` [PATCH 3/9] xfs: support inode btree blockcounts in online repair Darrick J. Wong
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-26 23:33 UTC (permalink / raw)
  To: sandeen, darrick.wong, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Source kernel commit: 1ac35f061af011442eeb731632f6daae991ecf7c

Now that we have reliable finobt block counts, use them to speed up the
per-AG block reservation calculations at mount time.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/xfs_ialloc_btree.c |   28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)


diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c
index 3ed87d1c7251..1edf69ffa96b 100644
--- a/libxfs/xfs_ialloc_btree.c
+++ b/libxfs/xfs_ialloc_btree.c
@@ -693,6 +693,28 @@ xfs_inobt_count_blocks(
 	return error;
 }
 
+/* Read finobt block count from AGI header. */
+static int
+xfs_finobt_read_blocks(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	xfs_agnumber_t		agno,
+	xfs_extlen_t		*tree_blocks)
+{
+	struct xfs_buf		*agbp;
+	struct xfs_agi		*agi;
+	int			error;
+
+	error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+	if (error)
+		return error;
+
+	agi = agbp->b_addr;
+	*tree_blocks = be32_to_cpu(agi->agi_fblocks);
+	xfs_trans_brelse(tp, agbp);
+	return 0;
+}
+
 /*
  * Figure out how many blocks to reserve and how many are used by this btree.
  */
@@ -710,7 +732,11 @@ xfs_finobt_calc_reserves(
 	if (!xfs_sb_version_hasfinobt(&mp->m_sb))
 		return 0;
 
-	error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, &tree_len);
+	if (xfs_sb_version_hasinobtcounts(&mp->m_sb))
+		error = xfs_finobt_read_blocks(mp, tp, agno, &tree_len);
+	else
+		error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO,
+				&tree_len);
 	if (error)
 		return error;
 


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 3/9] xfs: support inode btree blockcounts in online repair
  2020-10-26 23:33 [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header Darrick J. Wong
  2020-10-26 23:33 ` [PATCH 1/9] xfs: store inode btree block counts in " Darrick J. Wong
  2020-10-26 23:33 ` [PATCH 2/9] xfs: use the finobt block counts to speed up mount times Darrick J. Wong
@ 2020-10-26 23:33 ` Darrick J. Wong
  2020-10-26 23:33 ` [PATCH 4/9] xfs_db: support displaying inode btree block counts in AGI header Darrick J. Wong
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-26 23:33 UTC (permalink / raw)
  To: sandeen, darrick.wong, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Source kernel commit: 11f744234f052922db4ed77dad35862b3d3164cf

Add the necessary bits to the online repair code to support logging the
inode btree counters when rebuilding the btrees, and to support fixing
the counters when rebuilding the AGI.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/xfs_ialloc_btree.c |   16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)


diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c
index 1edf69ffa96b..953417151820 100644
--- a/libxfs/xfs_ialloc_btree.c
+++ b/libxfs/xfs_ialloc_btree.c
@@ -500,19 +500,29 @@ xfs_inobt_commit_staged_btree(
 {
 	struct xfs_agi		*agi = agbp->b_addr;
 	struct xbtree_afakeroot	*afake = cur->bc_ag.afake;
+	int			fields;
 
 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
 
 	if (cur->bc_btnum == XFS_BTNUM_INO) {
+		fields = XFS_AGI_ROOT | XFS_AGI_LEVEL;
 		agi->agi_root = cpu_to_be32(afake->af_root);
 		agi->agi_level = cpu_to_be32(afake->af_levels);
-		xfs_ialloc_log_agi(tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
+		if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) {
+			agi->agi_iblocks = cpu_to_be32(afake->af_blocks);
+			fields |= XFS_AGI_IBLOCKS;
+		}
+		xfs_ialloc_log_agi(tp, agbp, fields);
 		xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_inobt_ops);
 	} else {
+		fields = XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL;
 		agi->agi_free_root = cpu_to_be32(afake->af_root);
 		agi->agi_free_level = cpu_to_be32(afake->af_levels);
-		xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREE_ROOT |
-					     XFS_AGI_FREE_LEVEL);
+		if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) {
+			agi->agi_fblocks = cpu_to_be32(afake->af_blocks);
+			fields |= XFS_AGI_IBLOCKS;
+		}
+		xfs_ialloc_log_agi(tp, agbp, fields);
 		xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_finobt_ops);
 	}
 }


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 4/9] xfs_db: support displaying inode btree block counts in AGI header
  2020-10-26 23:33 [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header Darrick J. Wong
                   ` (2 preceding siblings ...)
  2020-10-26 23:33 ` [PATCH 3/9] xfs: support inode btree blockcounts in online repair Darrick J. Wong
@ 2020-10-26 23:33 ` Darrick J. Wong
  2020-10-28 17:28   ` Brian Foster
  2020-10-26 23:33 ` [PATCH 5/9] xfs_db: add inobtcnt upgrade path Darrick J. Wong
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-26 23:33 UTC (permalink / raw)
  To: sandeen, darrick.wong, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Fix up xfs_db to support displaying the btree block counts.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 db/agi.c |    2 ++
 db/sb.c  |    2 ++
 2 files changed, 4 insertions(+)


diff --git a/db/agi.c b/db/agi.c
index bf21b2d40f04..cfb4f7b8528a 100644
--- a/db/agi.c
+++ b/db/agi.c
@@ -48,6 +48,8 @@ const field_t	agi_flds[] = {
 	{ "lsn", FLDT_UINT64X, OI(OFF(lsn)), C1, 0, TYP_NONE },
 	{ "free_root", FLDT_AGBLOCK, OI(OFF(free_root)), C1, 0, TYP_FINOBT },
 	{ "free_level", FLDT_UINT32D, OI(OFF(free_level)), C1, 0, TYP_NONE },
+	{ "ino_blocks", FLDT_UINT32D, OI(OFF(iblocks)), C1, 0, TYP_NONE },
+	{ "fino_blocks", FLDT_UINT32D, OI(OFF(fblocks)), C1, 0, TYP_NONE },
 	{ NULL }
 };
 
diff --git a/db/sb.c b/db/sb.c
index 8a303422b427..e3b1fe0b2e6e 100644
--- a/db/sb.c
+++ b/db/sb.c
@@ -687,6 +687,8 @@ version_string(
 		strcat(s, ",RMAPBT");
 	if (xfs_sb_version_hasreflink(sbp))
 		strcat(s, ",REFLINK");
+	if (xfs_sb_version_hasinobtcounts(sbp))
+		strcat(s, ",INOBTCNT");
 	return s;
 }
 


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 5/9] xfs_db: add inobtcnt upgrade path
  2020-10-26 23:33 [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header Darrick J. Wong
                   ` (3 preceding siblings ...)
  2020-10-26 23:33 ` [PATCH 4/9] xfs_db: support displaying inode btree block counts in AGI header Darrick J. Wong
@ 2020-10-26 23:33 ` Darrick J. Wong
  2020-10-28 17:29   ` Brian Foster
  2020-11-16 21:13   ` [PATCH v2 " Darrick J. Wong
  2020-10-26 23:33 ` [PATCH 6/9] xfs_repair: check inode btree block counters in AGI Darrick J. Wong
                   ` (3 subsequent siblings)
  8 siblings, 2 replies; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-26 23:33 UTC (permalink / raw)
  To: sandeen, darrick.wong, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Enable users to upgrade their filesystems to support inode btree block
counters.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 db/sb.c              |   76 +++++++++++++++++++++++++++++++++++++++++++++++++-
 db/xfs_admin.sh      |    4 ++-
 man/man8/xfs_admin.8 |   16 +++++++++++
 3 files changed, 94 insertions(+), 2 deletions(-)


diff --git a/db/sb.c b/db/sb.c
index e3b1fe0b2e6e..b1033e5ef7f0 100644
--- a/db/sb.c
+++ b/db/sb.c
@@ -620,6 +620,44 @@ do_version(xfs_agnumber_t agno, uint16_t version, uint32_t features)
 	return 1;
 }
 
+/* Add new V5 features to the filesystem. */
+static bool
+add_v5_features(
+	struct xfs_mount	*mp,
+	uint32_t		compat,
+	uint32_t		ro_compat,
+	uint32_t		incompat,
+	uint32_t		log_incompat)
+{
+	struct xfs_sb		tsb;
+	xfs_agnumber_t		agno;
+
+	dbprintf(_("Upgrading V5 filesystem\n"));
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		if (!get_sb(agno, &tsb))
+			break;
+
+		tsb.sb_features_compat |= compat;
+		tsb.sb_features_ro_compat |= ro_compat;
+		tsb.sb_features_incompat |= incompat;
+		tsb.sb_features_log_incompat |= log_incompat;
+		libxfs_sb_to_disk(iocur_top->data, &tsb);
+		write_cur();
+	}
+
+	if (agno != mp->m_sb.sb_agcount) {
+		dbprintf(
+_("Failed to upgrade V5 filesystem AG %d\n"), agno);
+		return false;
+	}
+
+	mp->m_sb.sb_features_compat |= compat;
+	mp->m_sb.sb_features_ro_compat |= ro_compat;
+	mp->m_sb.sb_features_incompat |= incompat;
+	mp->m_sb.sb_features_log_incompat |= log_incompat;
+	return true;
+}
+
 static char *
 version_string(
 	xfs_sb_t	*sbp)
@@ -705,6 +743,10 @@ version_f(
 {
 	uint16_t	version = 0;
 	uint32_t	features = 0;
+	uint32_t	upgrade_compat = 0;
+	uint32_t	upgrade_ro_compat = 0;
+	uint32_t	upgrade_incompat = 0;
+	uint32_t	upgrade_log_incompat = 0;
 	xfs_agnumber_t	ag;
 
 	if (argc == 2) {	/* WRITE VERSION */
@@ -716,7 +758,28 @@ version_f(
 		}
 
 		/* Logic here derived from the IRIX xfs_chver(1M) script. */
-		if (!strcasecmp(argv[1], "extflg")) {
+		if (!strcasecmp(argv[1], "inobtcount")) {
+			if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+				dbprintf(
+		_("inode btree counter feature is already enabled\n"));
+				exitcode = 1;
+				return 1;
+			}
+			if (!xfs_sb_version_hasfinobt(&mp->m_sb)) {
+				dbprintf(
+		_("inode btree counter feature cannot be enabled on filesystems lacking free inode btrees\n"));
+				exitcode = 1;
+				return 1;
+			}
+			if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+				dbprintf(
+		_("inode btree counter feature cannot be enabled on pre-V5 filesystems\n"));
+				exitcode = 1;
+				return 1;
+			}
+
+			upgrade_ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT;
+		} else if (!strcasecmp(argv[1], "extflg")) {
 			switch (XFS_SB_VERSION_NUM(&mp->m_sb)) {
 			case XFS_SB_VERSION_1:
 				version = 0x0004 | XFS_SB_VERSION_EXTFLGBIT;
@@ -807,6 +870,17 @@ version_f(
 			mp->m_sb.sb_versionnum = version;
 			mp->m_sb.sb_features2 = features;
 		}
+
+		if (upgrade_compat || upgrade_ro_compat || upgrade_incompat ||
+		    upgrade_log_incompat) {
+			if (!add_v5_features(mp, upgrade_compat,
+					upgrade_ro_compat,
+					upgrade_incompat,
+					upgrade_log_incompat)) {
+				exitcode = 1;
+				return 1;
+			}
+		}
 	}
 
 	if (argc == 3) {	/* VERSIONNUM + FEATURES2 */
diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
index bd325da2f776..0f0c8d18d6cb 100755
--- a/db/xfs_admin.sh
+++ b/db/xfs_admin.sh
@@ -9,7 +9,7 @@ DB_OPTS=""
 REPAIR_OPTS=""
 USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] device [logdev]"
 
-while getopts "efjlpuc:L:U:V" c
+while getopts "efjlpuc:L:O:U:V" c
 do
 	case $c in
 	c)	REPAIR_OPTS=$REPAIR_OPTS" -c lazycount="$OPTARG;;
@@ -19,6 +19,8 @@ do
 	l)	DB_OPTS=$DB_OPTS" -r -c label";;
 	L)	DB_OPTS=$DB_OPTS" -c 'label "$OPTARG"'";;
 	p)	DB_OPTS=$DB_OPTS" -c 'version projid32bit'";;
+	O)	DB_OPTS=$DB_OPTS" -c 'version "$OPTARG"'";
+		REPAIR_OPTS="$REPAIR_OPTS ";;
 	u)	DB_OPTS=$DB_OPTS" -r -c uuid";;
 	U)	DB_OPTS=$DB_OPTS" -c 'uuid "$OPTARG"'";;
 	V)	xfs_db -p xfs_admin -V
diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
index 8afc873fb50a..65ca6afc1e12 100644
--- a/man/man8/xfs_admin.8
+++ b/man/man8/xfs_admin.8
@@ -6,6 +6,8 @@ xfs_admin \- change parameters of an XFS filesystem
 [
 .B \-eflpu
 ] [
+.BR \-O " feature"
+] [
 .BR "\-c 0" | 1
 ] [
 .B \-L
@@ -103,6 +105,20 @@ The filesystem label can be cleared using the special "\c
 " value for
 .IR label .
 .TP
+.BI \-O " feature"
+Add a new feature to the filesystem.
+Only one feature can be specified at a time.
+Features are as follows:
+.RS 0.7i
+.TP
+.B inobtcount
+Upgrade the filesystem to support the inode btree counters feature.
+This reduces mount time by caching the size of the inode btrees in the
+allocation group metadata.
+Once enabled, the filesystem will not be writable by older kernels.
+The filesystem cannot be downgraded after this feature is enabled.
+.RE
+.TP
 .BI \-U " uuid"
 Set the UUID of the filesystem to
 .IR uuid .


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 6/9] xfs_repair: check inode btree block counters in AGI
  2020-10-26 23:33 [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header Darrick J. Wong
                   ` (4 preceding siblings ...)
  2020-10-26 23:33 ` [PATCH 5/9] xfs_db: add inobtcnt upgrade path Darrick J. Wong
@ 2020-10-26 23:33 ` Darrick J. Wong
  2020-10-28 17:29   ` Brian Foster
  2020-11-16 17:19   ` [PATCH v2 " Darrick J. Wong
  2020-10-26 23:33 ` [PATCH 7/9] xfs_repair: regenerate " Darrick J. Wong
                   ` (2 subsequent siblings)
  8 siblings, 2 replies; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-26 23:33 UTC (permalink / raw)
  To: sandeen, darrick.wong, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Make sure that both inode btree block counters in the AGI are correct.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/scan.c |   38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)


diff --git a/repair/scan.c b/repair/scan.c
index 2a38ae5197c6..c826af7dee86 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -1948,6 +1948,12 @@ _("invalid inode count, inode chunk %d/%u, count %d ninodes %d\n"),
 	return suspect;
 }
 
+struct ino_priv {
+	struct aghdr_cnts	*agcnts;
+	uint32_t		ino_blocks;
+	uint32_t		fino_blocks;
+};
+
 /*
  * this one walks the inode btrees sucking the info there into
  * the incore avl tree.  We try and rescue corrupted btree records
@@ -1976,7 +1982,8 @@ scan_inobt(
 	void			*priv,
 	const struct xfs_buf_ops *ops)
 {
-	struct aghdr_cnts	*agcnts = priv;
+	struct ino_priv		*ipriv = priv;
+	struct aghdr_cnts	*agcnts = ipriv->agcnts;
 	char			*name;
 	xfs_agino_t		lastino = 0;
 	int			i;
@@ -2022,6 +2029,17 @@ scan_inobt(
 			return;
 	}
 
+	switch (magic) {
+	case XFS_FIBT_MAGIC:
+	case XFS_FIBT_CRC_MAGIC:
+		ipriv->fino_blocks++;
+		break;
+	case XFS_IBT_MAGIC:
+	case XFS_IBT_CRC_MAGIC:
+		ipriv->ino_blocks++;
+		break;
+	}
+
 	/*
 	 * check for btree blocks multiply claimed, any unknown/free state
 	 * is ok in the bitmap block.
@@ -2363,6 +2381,9 @@ validate_agi(
 	xfs_agnumber_t		agno,
 	struct aghdr_cnts	*agcnts)
 {
+	struct ino_priv		priv = {
+		.agcnts = agcnts,
+	};
 	xfs_agblock_t		bno;
 	int			i;
 	uint32_t		magic;
@@ -2372,7 +2393,7 @@ validate_agi(
 		magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_IBT_CRC_MAGIC
 							 : XFS_IBT_MAGIC;
 		scan_sbtree(bno, be32_to_cpu(agi->agi_level),
-			    agno, 0, scan_inobt, 1, magic, agcnts,
+			    agno, 0, scan_inobt, 1, magic, &priv,
 			    &xfs_inobt_buf_ops);
 	} else {
 		do_warn(_("bad agbno %u for inobt root, agno %d\n"),
@@ -2385,7 +2406,7 @@ validate_agi(
 			magic = xfs_sb_version_hascrc(&mp->m_sb) ?
 					XFS_FIBT_CRC_MAGIC : XFS_FIBT_MAGIC;
 			scan_sbtree(bno, be32_to_cpu(agi->agi_free_level),
-				    agno, 0, scan_inobt, 1, magic, agcnts,
+				    agno, 0, scan_inobt, 1, magic, &priv,
 				    &xfs_finobt_buf_ops);
 		} else {
 			do_warn(_("bad agbno %u for finobt root, agno %d\n"),
@@ -2393,6 +2414,17 @@ validate_agi(
 		}
 	}
 
+	if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+		if (be32_to_cpu(agi->agi_iblocks) != priv.ino_blocks)
+			do_warn(_("bad inobt block count %u, saw %u\n"),
+					priv.ino_blocks,
+					be32_to_cpu(agi->agi_iblocks));
+		if (be32_to_cpu(agi->agi_fblocks) != priv.fino_blocks)
+			do_warn(_("bad finobt block count %u, saw %u\n"),
+					priv.fino_blocks,
+					be32_to_cpu(agi->agi_fblocks));
+	}
+
 	if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
 		do_warn(_("agi_count %u, counted %u in ag %u\n"),
 			 be32_to_cpu(agi->agi_count), agcnts->agicount, agno);


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 7/9] xfs_repair: regenerate inode btree block counters in AGI
  2020-10-26 23:33 [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header Darrick J. Wong
                   ` (5 preceding siblings ...)
  2020-10-26 23:33 ` [PATCH 6/9] xfs_repair: check inode btree block counters in AGI Darrick J. Wong
@ 2020-10-26 23:33 ` Darrick J. Wong
  2020-10-28 17:30   ` Brian Foster
  2020-10-26 23:33 ` [PATCH 8/9] mkfs: enable the inode btree counter feature Darrick J. Wong
  2020-10-26 23:34 ` [PATCH 9/9] xfs: enable new inode btree counters feature Darrick J. Wong
  8 siblings, 1 reply; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-26 23:33 UTC (permalink / raw)
  To: sandeen, darrick.wong, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Reset both inode btree block counters in the AGI when rebuilding the
metadata indexes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 repair/phase5.c |    5 +++++
 1 file changed, 5 insertions(+)


diff --git a/repair/phase5.c b/repair/phase5.c
index 446f7ec0a1db..b97d23809f3c 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -172,6 +172,11 @@ build_agi(
 				cpu_to_be32(btr_fino->newbt.afake.af_levels);
 	}
 
+	if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+		agi->agi_iblocks = cpu_to_be32(btr_ino->newbt.afake.af_blocks);
+		agi->agi_fblocks = cpu_to_be32(btr_fino->newbt.afake.af_blocks);
+	}
+
 	libxfs_buf_mark_dirty(agi_buf);
 	libxfs_buf_relse(agi_buf);
 }


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 8/9] mkfs: enable the inode btree counter feature
  2020-10-26 23:33 [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header Darrick J. Wong
                   ` (6 preceding siblings ...)
  2020-10-26 23:33 ` [PATCH 7/9] xfs_repair: regenerate " Darrick J. Wong
@ 2020-10-26 23:33 ` Darrick J. Wong
  2020-10-28 17:30   ` Brian Foster
  2020-10-26 23:34 ` [PATCH 9/9] xfs: enable new inode btree counters feature Darrick J. Wong
  8 siblings, 1 reply; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-26 23:33 UTC (permalink / raw)
  To: sandeen, darrick.wong, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Teach mkfs how to enable the inode btree counter feature.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 man/man8/mkfs.xfs.8 |   15 +++++++++++++++
 mkfs/xfs_mkfs.c     |   34 +++++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 1 deletion(-)


diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8
index 0a7858748457..1a6a5f93f0ea 100644
--- a/man/man8/mkfs.xfs.8
+++ b/man/man8/mkfs.xfs.8
@@ -184,6 +184,21 @@ option set. When the option
 .B \-m crc=0
 is used, the free inode btree feature is not supported and is disabled.
 .TP
+.BI inobtcount= value
+This option causes the filesystem to record the number of blocks used by
+the inode btree and the free inode btree.
+This can be used to reduce mount times when the free inode btree is enabled.
+.IP
+By default,
+.B mkfs.xfs
+will not enable this option.
+This feature is only available for filesystems created with the (default)
+.B \-m finobt=1
+option set.
+When the option
+.B \-m finobt=0
+is used, the inode btree counter feature is not supported and is disabled.
+.TP
 .BI uuid= value
 Use the given value as the filesystem UUID for the newly created filesystem.
 The default is to generate a random UUID.
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index 908d520df909..23e3077e0174 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -119,6 +119,7 @@ enum {
 	M_UUID,
 	M_RMAPBT,
 	M_REFLINK,
+	M_INOBTCNT,
 	M_MAX_OPTS,
 };
 
@@ -660,6 +661,7 @@ static struct opt_params mopts = {
 		[M_UUID] = "uuid",
 		[M_RMAPBT] = "rmapbt",
 		[M_REFLINK] = "reflink",
+		[M_INOBTCNT] = "inobtcount",
 	},
 	.subopt_params = {
 		{ .index = M_CRC,
@@ -690,6 +692,12 @@ static struct opt_params mopts = {
 		  .maxval = 1,
 		  .defaultval = 1,
 		},
+		{ .index = M_INOBTCNT,
+		  .conflicts = { { NULL, LAST_CONFLICT } },
+		  .minval = 0,
+		  .maxval = 1,
+		  .defaultval = 1,
+		},
 	},
 };
 
@@ -740,6 +748,7 @@ struct sb_feat_args {
 	bool	spinodes;		/* XFS_SB_FEAT_INCOMPAT_SPINODES */
 	bool	rmapbt;			/* XFS_SB_FEAT_RO_COMPAT_RMAPBT */
 	bool	reflink;		/* XFS_SB_FEAT_RO_COMPAT_REFLINK */
+	bool	inobtcnt;		/* XFS_SB_FEAT_RO_COMPAT_INOBTCNT */
 	bool	nodalign;
 	bool	nortalign;
 };
@@ -862,7 +871,8 @@ usage( void )
 {
 	fprintf(stderr, _("Usage: %s\n\
 /* blocksize */		[-b size=num]\n\
-/* metadata */		[-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1,reflink=0|1]\n\
+/* metadata */		[-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1,reflink=0|1,\n\
+			    inobtcnt=0|1]\n\
 /* data subvol */	[-d agcount=n,agsize=n,file,name=xxx,size=num,\n\
 			    (sunit=value,swidth=value|su=num,sw=num|noalign),\n\
 			    sectsize=num\n\
@@ -1565,6 +1575,9 @@ meta_opts_parser(
 	case M_REFLINK:
 		cli->sb_feat.reflink = getnum(value, opts, subopt);
 		break;
+	case M_INOBTCNT:
+		cli->sb_feat.inobtcnt = getnum(value, opts, subopt);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1988,6 +2001,22 @@ _("reflink not supported without CRC support\n"));
 			usage();
 		}
 		cli->sb_feat.reflink = false;
+
+		if (cli->sb_feat.inobtcnt && cli_opt_set(&mopts, M_INOBTCNT)) {
+			fprintf(stderr,
+_("inode btree counters not supported without CRC support\n"));
+			usage();
+		}
+		cli->sb_feat.inobtcnt = false;
+	}
+
+	if (!cli->sb_feat.finobt) {
+		if (cli->sb_feat.inobtcnt && cli_opt_set(&mopts, M_INOBTCNT)) {
+			fprintf(stderr,
+_("inode btree counters not supported without finobt support\n"));
+			usage();
+		}
+		cli->sb_feat.inobtcnt = false;
 	}
 
 	if ((cli->fsx.fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
@@ -2955,6 +2984,8 @@ sb_set_features(
 		sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_RMAPBT;
 	if (fp->reflink)
 		sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_REFLINK;
+	if (fp->inobtcnt)
+		sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT;
 
 	/*
 	 * Sparse inode chunk support has two main inode alignment requirements.
@@ -3617,6 +3648,7 @@ main(
 			.spinodes = true,
 			.rmapbt = false,
 			.reflink = true,
+			.inobtcnt = false,
 			.parent_pointers = false,
 			.nodalign = false,
 			.nortalign = false,


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 9/9] xfs: enable new inode btree counters feature
  2020-10-26 23:33 [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header Darrick J. Wong
                   ` (7 preceding siblings ...)
  2020-10-26 23:33 ` [PATCH 8/9] mkfs: enable the inode btree counter feature Darrick J. Wong
@ 2020-10-26 23:34 ` Darrick J. Wong
  8 siblings, 0 replies; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-26 23:34 UTC (permalink / raw)
  To: sandeen, darrick.wong, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Source kernel commit: b896a39faa5a2f97dadfb347928466afb12cc63a

Enable the new inode btree counters feature.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libxfs/xfs_format.h |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
index 01544f9d5a57..5a183b6658e6 100644
--- a/libxfs/xfs_format.h
+++ b/libxfs/xfs_format.h
@@ -453,7 +453,8 @@ xfs_sb_has_compat_feature(
 #define XFS_SB_FEAT_RO_COMPAT_ALL \
 		(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
 		 XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
-		 XFS_SB_FEAT_RO_COMPAT_REFLINK)
+		 XFS_SB_FEAT_RO_COMPAT_REFLINK| \
+		 XFS_SB_FEAT_RO_COMPAT_INOBTCNT)
 #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL
 static inline bool
 xfs_sb_has_ro_compat_feature(


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/9] xfs_db: support displaying inode btree block counts in AGI header
  2020-10-26 23:33 ` [PATCH 4/9] xfs_db: support displaying inode btree block counts in AGI header Darrick J. Wong
@ 2020-10-28 17:28   ` Brian Foster
  0 siblings, 0 replies; 26+ messages in thread
From: Brian Foster @ 2020-10-28 17:28 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Oct 26, 2020 at 04:33:31PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Fix up xfs_db to support displaying the btree block counts.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  db/agi.c |    2 ++
>  db/sb.c  |    2 ++
>  2 files changed, 4 insertions(+)
> 
> 
> diff --git a/db/agi.c b/db/agi.c
> index bf21b2d40f04..cfb4f7b8528a 100644
> --- a/db/agi.c
> +++ b/db/agi.c
> @@ -48,6 +48,8 @@ const field_t	agi_flds[] = {
>  	{ "lsn", FLDT_UINT64X, OI(OFF(lsn)), C1, 0, TYP_NONE },
>  	{ "free_root", FLDT_AGBLOCK, OI(OFF(free_root)), C1, 0, TYP_FINOBT },
>  	{ "free_level", FLDT_UINT32D, OI(OFF(free_level)), C1, 0, TYP_NONE },
> +	{ "ino_blocks", FLDT_UINT32D, OI(OFF(iblocks)), C1, 0, TYP_NONE },
> +	{ "fino_blocks", FLDT_UINT32D, OI(OFF(fblocks)), C1, 0, TYP_NONE },
>  	{ NULL }
>  };
>  
> diff --git a/db/sb.c b/db/sb.c
> index 8a303422b427..e3b1fe0b2e6e 100644
> --- a/db/sb.c
> +++ b/db/sb.c
> @@ -687,6 +687,8 @@ version_string(
>  		strcat(s, ",RMAPBT");
>  	if (xfs_sb_version_hasreflink(sbp))
>  		strcat(s, ",REFLINK");
> +	if (xfs_sb_version_hasinobtcounts(sbp))
> +		strcat(s, ",INOBTCNT");
>  	return s;
>  }
>  
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 5/9] xfs_db: add inobtcnt upgrade path
  2020-10-26 23:33 ` [PATCH 5/9] xfs_db: add inobtcnt upgrade path Darrick J. Wong
@ 2020-10-28 17:29   ` Brian Foster
  2020-10-29  0:03     ` Darrick J. Wong
  2020-11-16 21:13   ` [PATCH v2 " Darrick J. Wong
  1 sibling, 1 reply; 26+ messages in thread
From: Brian Foster @ 2020-10-28 17:29 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Oct 26, 2020 at 04:33:38PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Enable users to upgrade their filesystems to support inode btree block
> counters.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  db/sb.c              |   76 +++++++++++++++++++++++++++++++++++++++++++++++++-
>  db/xfs_admin.sh      |    4 ++-
>  man/man8/xfs_admin.8 |   16 +++++++++++
>  3 files changed, 94 insertions(+), 2 deletions(-)
> 
> 
> diff --git a/db/sb.c b/db/sb.c
> index e3b1fe0b2e6e..b1033e5ef7f0 100644
> --- a/db/sb.c
> +++ b/db/sb.c
> @@ -620,6 +620,44 @@ do_version(xfs_agnumber_t agno, uint16_t version, uint32_t features)
>  	return 1;
>  }
>  
> +/* Add new V5 features to the filesystem. */
> +static bool
> +add_v5_features(
> +	struct xfs_mount	*mp,
> +	uint32_t		compat,
> +	uint32_t		ro_compat,
> +	uint32_t		incompat,
> +	uint32_t		log_incompat)
> +{
> +	struct xfs_sb		tsb;
> +	xfs_agnumber_t		agno;
> +
> +	dbprintf(_("Upgrading V5 filesystem\n"));
> +	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> +		if (!get_sb(agno, &tsb))
> +			break;
> +
> +		tsb.sb_features_compat |= compat;
> +		tsb.sb_features_ro_compat |= ro_compat;
> +		tsb.sb_features_incompat |= incompat;
> +		tsb.sb_features_log_incompat |= log_incompat;
> +		libxfs_sb_to_disk(iocur_top->data, &tsb);
> +		write_cur();
> +	}
> +
> +	if (agno != mp->m_sb.sb_agcount) {
> +		dbprintf(
> +_("Failed to upgrade V5 filesystem AG %d\n"), agno);
> +		return false;

Do we need to undo changes if this somehow occurs?

> +	}
> +
> +	mp->m_sb.sb_features_compat |= compat;
> +	mp->m_sb.sb_features_ro_compat |= ro_compat;
> +	mp->m_sb.sb_features_incompat |= incompat;
> +	mp->m_sb.sb_features_log_incompat |= log_incompat;
> +	return true;
> +}
> +
>  static char *
>  version_string(
>  	xfs_sb_t	*sbp)
> @@ -705,6 +743,10 @@ version_f(

The comment above version_f() needs an update if we start to support v5
features.

>  {
>  	uint16_t	version = 0;
>  	uint32_t	features = 0;
> +	uint32_t	upgrade_compat = 0;
> +	uint32_t	upgrade_ro_compat = 0;
> +	uint32_t	upgrade_incompat = 0;
> +	uint32_t	upgrade_log_incompat = 0;
>  	xfs_agnumber_t	ag;
>  
>  	if (argc == 2) {	/* WRITE VERSION */
> @@ -716,7 +758,28 @@ version_f(
>  		}
>  
>  		/* Logic here derived from the IRIX xfs_chver(1M) script. */
> -		if (!strcasecmp(argv[1], "extflg")) {
> +		if (!strcasecmp(argv[1], "inobtcount")) {
> +			if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
> +				dbprintf(
> +		_("inode btree counter feature is already enabled\n"));
> +				exitcode = 1;
> +				return 1;
> +			}
> +			if (!xfs_sb_version_hasfinobt(&mp->m_sb)) {
> +				dbprintf(
> +		_("inode btree counter feature cannot be enabled on filesystems lacking free inode btrees\n"));
> +				exitcode = 1;
> +				return 1;
> +			}
> +			if (!xfs_sb_version_hascrc(&mp->m_sb)) {
> +				dbprintf(
> +		_("inode btree counter feature cannot be enabled on pre-V5 filesystems\n"));
> +				exitcode = 1;
> +				return 1;
> +			}
> +
> +			upgrade_ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT;
> +		} else if (!strcasecmp(argv[1], "extflg")) {
>  			switch (XFS_SB_VERSION_NUM(&mp->m_sb)) {
>  			case XFS_SB_VERSION_1:
>  				version = 0x0004 | XFS_SB_VERSION_EXTFLGBIT;
> @@ -807,6 +870,17 @@ version_f(
>  			mp->m_sb.sb_versionnum = version;
>  			mp->m_sb.sb_features2 = features;
>  		}
> +
> +		if (upgrade_compat || upgrade_ro_compat || upgrade_incompat ||
> +		    upgrade_log_incompat) {
> +			if (!add_v5_features(mp, upgrade_compat,
> +					upgrade_ro_compat,
> +					upgrade_incompat,
> +					upgrade_log_incompat)) {
> +				exitcode = 1;
> +				return 1;
> +			}
> +		}

What's the purpose of the unused upgrade variables?

Also, it looks like we just update the feature bits here. What about the
counters? Is the user expected to run xfs_repair?

>  	}
>  
>  	if (argc == 3) {	/* VERSIONNUM + FEATURES2 */
> diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
> index bd325da2f776..0f0c8d18d6cb 100755
> --- a/db/xfs_admin.sh
> +++ b/db/xfs_admin.sh
> @@ -9,7 +9,7 @@ DB_OPTS=""
>  REPAIR_OPTS=""
>  USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] device [logdev]"
>  
> -while getopts "efjlpuc:L:U:V" c
> +while getopts "efjlpuc:L:O:U:V" c
>  do
>  	case $c in
>  	c)	REPAIR_OPTS=$REPAIR_OPTS" -c lazycount="$OPTARG;;
> @@ -19,6 +19,8 @@ do
>  	l)	DB_OPTS=$DB_OPTS" -r -c label";;
>  	L)	DB_OPTS=$DB_OPTS" -c 'label "$OPTARG"'";;
>  	p)	DB_OPTS=$DB_OPTS" -c 'version projid32bit'";;
> +	O)	DB_OPTS=$DB_OPTS" -c 'version "$OPTARG"'";
> +		REPAIR_OPTS="$REPAIR_OPTS ";;

Ah, I see.. xfs_admin runs repair if options are specified, hence this
little whitespace hack. It might be worth a comment here so somebody
doesn't fly by and "clean" that up. ;)

BTW, does this also address the error scenario I asked about above...?

>  	u)	DB_OPTS=$DB_OPTS" -r -c uuid";;
>  	U)	DB_OPTS=$DB_OPTS" -c 'uuid "$OPTARG"'";;
>  	V)	xfs_db -p xfs_admin -V
> diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
> index 8afc873fb50a..65ca6afc1e12 100644
> --- a/man/man8/xfs_admin.8
> +++ b/man/man8/xfs_admin.8
> @@ -6,6 +6,8 @@ xfs_admin \- change parameters of an XFS filesystem
>  [
>  .B \-eflpu
>  ] [
> +.BR \-O " feature"
> +] [
>  .BR "\-c 0" | 1
>  ] [
>  .B \-L
> @@ -103,6 +105,20 @@ The filesystem label can be cleared using the special "\c
>  " value for
>  .IR label .
>  .TP
> +.BI \-O " feature"
> +Add a new feature to the filesystem.
> +Only one feature can be specified at a time.
> +Features are as follows:
> +.RS 0.7i
> +.TP
> +.B inobtcount
> +Upgrade the filesystem to support the inode btree counters feature.
> +This reduces mount time by caching the size of the inode btrees in the
> +allocation group metadata.
> +Once enabled, the filesystem will not be writable by older kernels.
> +The filesystem cannot be downgraded after this feature is enabled.

Any reason for not allowing the downgrade path? It seems like we're
mostly there implementation wise and that might facilitate enabling the
feature by default.

Brian

> +.RE
> +.TP
>  .BI \-U " uuid"
>  Set the UUID of the filesystem to
>  .IR uuid .
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 6/9] xfs_repair: check inode btree block counters in AGI
  2020-10-26 23:33 ` [PATCH 6/9] xfs_repair: check inode btree block counters in AGI Darrick J. Wong
@ 2020-10-28 17:29   ` Brian Foster
  2020-10-29  1:01     ` Darrick J. Wong
  2020-11-16 17:19   ` [PATCH v2 " Darrick J. Wong
  1 sibling, 1 reply; 26+ messages in thread
From: Brian Foster @ 2020-10-28 17:29 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Oct 26, 2020 at 04:33:44PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Make sure that both inode btree block counters in the AGI are correct.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  repair/scan.c |   38 +++++++++++++++++++++++++++++++++++---
>  1 file changed, 35 insertions(+), 3 deletions(-)
> 
> 
> diff --git a/repair/scan.c b/repair/scan.c
> index 2a38ae5197c6..c826af7dee86 100644
> --- a/repair/scan.c
> +++ b/repair/scan.c
...
> @@ -2022,6 +2029,17 @@ scan_inobt(
>  			return;
>  	}
>  
> +	switch (magic) {
> +	case XFS_FIBT_MAGIC:
> +	case XFS_FIBT_CRC_MAGIC:
> +		ipriv->fino_blocks++;
> +		break;
> +	case XFS_IBT_MAGIC:
> +	case XFS_IBT_CRC_MAGIC:
> +		ipriv->ino_blocks++;
> +		break;
> +	}
> +

Is this intentionally not folded into the earlier magic switch
statement?

>  	/*
>  	 * check for btree blocks multiply claimed, any unknown/free state
>  	 * is ok in the bitmap block.
...
> @@ -2393,6 +2414,17 @@ validate_agi(
>  		}
>  	}
>  
> +	if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
> +		if (be32_to_cpu(agi->agi_iblocks) != priv.ino_blocks)
> +			do_warn(_("bad inobt block count %u, saw %u\n"),
> +					priv.ino_blocks,
> +					be32_to_cpu(agi->agi_iblocks));

These two params are backwards (here and below), no?

Brian

> +		if (be32_to_cpu(agi->agi_fblocks) != priv.fino_blocks)
> +			do_warn(_("bad finobt block count %u, saw %u\n"),
> +					priv.fino_blocks,
> +					be32_to_cpu(agi->agi_fblocks));
> +	}
> +
>  	if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
>  		do_warn(_("agi_count %u, counted %u in ag %u\n"),
>  			 be32_to_cpu(agi->agi_count), agcnts->agicount, agno);
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 7/9] xfs_repair: regenerate inode btree block counters in AGI
  2020-10-26 23:33 ` [PATCH 7/9] xfs_repair: regenerate " Darrick J. Wong
@ 2020-10-28 17:30   ` Brian Foster
  0 siblings, 0 replies; 26+ messages in thread
From: Brian Foster @ 2020-10-28 17:30 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Oct 26, 2020 at 04:33:50PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Reset both inode btree block counters in the AGI when rebuilding the
> metadata indexes.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  repair/phase5.c |    5 +++++
>  1 file changed, 5 insertions(+)
> 
> 
> diff --git a/repair/phase5.c b/repair/phase5.c
> index 446f7ec0a1db..b97d23809f3c 100644
> --- a/repair/phase5.c
> +++ b/repair/phase5.c
> @@ -172,6 +172,11 @@ build_agi(
>  				cpu_to_be32(btr_fino->newbt.afake.af_levels);
>  	}
>  
> +	if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
> +		agi->agi_iblocks = cpu_to_be32(btr_ino->newbt.afake.af_blocks);
> +		agi->agi_fblocks = cpu_to_be32(btr_fino->newbt.afake.af_blocks);
> +	}
> +
>  	libxfs_buf_mark_dirty(agi_buf);
>  	libxfs_buf_relse(agi_buf);
>  }
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 8/9] mkfs: enable the inode btree counter feature
  2020-10-26 23:33 ` [PATCH 8/9] mkfs: enable the inode btree counter feature Darrick J. Wong
@ 2020-10-28 17:30   ` Brian Foster
  2020-10-29  1:02     ` Darrick J. Wong
  0 siblings, 1 reply; 26+ messages in thread
From: Brian Foster @ 2020-10-28 17:30 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Oct 26, 2020 at 04:33:56PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Teach mkfs how to enable the inode btree counter feature.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  man/man8/mkfs.xfs.8 |   15 +++++++++++++++
>  mkfs/xfs_mkfs.c     |   34 +++++++++++++++++++++++++++++++++-
>  2 files changed, 48 insertions(+), 1 deletion(-)
> 
> 
> diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8
> index 0a7858748457..1a6a5f93f0ea 100644
> --- a/man/man8/mkfs.xfs.8
> +++ b/man/man8/mkfs.xfs.8
...
> @@ -862,7 +871,8 @@ usage( void )
>  {
>  	fprintf(stderr, _("Usage: %s\n\
>  /* blocksize */		[-b size=num]\n\
> -/* metadata */		[-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1,reflink=0|1]\n\
> +/* metadata */		[-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1,reflink=0|1,\n\
> +			    inobtcnt=0|1]\n\
>  /* data subvol */	[-d agcount=n,agsize=n,file,name=xxx,size=num,\n\
>  			    (sunit=value,swidth=value|su=num,sw=num|noalign),\n\
>  			    sectsize=num\n\

Any plans to add a geometry flag so the feature state is reported on
success? Otherwise LGTM:

Reviewed-by: Brian Foster <bfoster@redhat.com>

> @@ -1565,6 +1575,9 @@ meta_opts_parser(
>  	case M_REFLINK:
>  		cli->sb_feat.reflink = getnum(value, opts, subopt);
>  		break;
> +	case M_INOBTCNT:
> +		cli->sb_feat.inobtcnt = getnum(value, opts, subopt);
> +		break;
>  	default:
>  		return -EINVAL;
>  	}
> @@ -1988,6 +2001,22 @@ _("reflink not supported without CRC support\n"));
>  			usage();
>  		}
>  		cli->sb_feat.reflink = false;
> +
> +		if (cli->sb_feat.inobtcnt && cli_opt_set(&mopts, M_INOBTCNT)) {
> +			fprintf(stderr,
> +_("inode btree counters not supported without CRC support\n"));
> +			usage();
> +		}
> +		cli->sb_feat.inobtcnt = false;
> +	}
> +
> +	if (!cli->sb_feat.finobt) {
> +		if (cli->sb_feat.inobtcnt && cli_opt_set(&mopts, M_INOBTCNT)) {
> +			fprintf(stderr,
> +_("inode btree counters not supported without finobt support\n"));
> +			usage();
> +		}
> +		cli->sb_feat.inobtcnt = false;
>  	}
>  
>  	if ((cli->fsx.fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
> @@ -2955,6 +2984,8 @@ sb_set_features(
>  		sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_RMAPBT;
>  	if (fp->reflink)
>  		sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_REFLINK;
> +	if (fp->inobtcnt)
> +		sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT;
>  
>  	/*
>  	 * Sparse inode chunk support has two main inode alignment requirements.
> @@ -3617,6 +3648,7 @@ main(
>  			.spinodes = true,
>  			.rmapbt = false,
>  			.reflink = true,
> +			.inobtcnt = false,
>  			.parent_pointers = false,
>  			.nodalign = false,
>  			.nortalign = false,
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 5/9] xfs_db: add inobtcnt upgrade path
  2020-10-28 17:29   ` Brian Foster
@ 2020-10-29  0:03     ` Darrick J. Wong
  2020-10-29 12:09       ` Brian Foster
  0 siblings, 1 reply; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-29  0:03 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Wed, Oct 28, 2020 at 01:29:25PM -0400, Brian Foster wrote:
> On Mon, Oct 26, 2020 at 04:33:38PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Enable users to upgrade their filesystems to support inode btree block
> > counters.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  db/sb.c              |   76 +++++++++++++++++++++++++++++++++++++++++++++++++-
> >  db/xfs_admin.sh      |    4 ++-
> >  man/man8/xfs_admin.8 |   16 +++++++++++
> >  3 files changed, 94 insertions(+), 2 deletions(-)
> > 
> > 
> > diff --git a/db/sb.c b/db/sb.c
> > index e3b1fe0b2e6e..b1033e5ef7f0 100644
> > --- a/db/sb.c
> > +++ b/db/sb.c
> > @@ -620,6 +620,44 @@ do_version(xfs_agnumber_t agno, uint16_t version, uint32_t features)
> >  	return 1;
> >  }
> >  
> > +/* Add new V5 features to the filesystem. */
> > +static bool
> > +add_v5_features(
> > +	struct xfs_mount	*mp,
> > +	uint32_t		compat,
> > +	uint32_t		ro_compat,
> > +	uint32_t		incompat,
> > +	uint32_t		log_incompat)
> > +{
> > +	struct xfs_sb		tsb;
> > +	xfs_agnumber_t		agno;
> > +
> > +	dbprintf(_("Upgrading V5 filesystem\n"));
> > +	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> > +		if (!get_sb(agno, &tsb))
> > +			break;
> > +
> > +		tsb.sb_features_compat |= compat;
> > +		tsb.sb_features_ro_compat |= ro_compat;
> > +		tsb.sb_features_incompat |= incompat;
> > +		tsb.sb_features_log_incompat |= log_incompat;
> > +		libxfs_sb_to_disk(iocur_top->data, &tsb);
> > +		write_cur();
> > +	}
> > +
> > +	if (agno != mp->m_sb.sb_agcount) {
> > +		dbprintf(
> > +_("Failed to upgrade V5 filesystem AG %d\n"), agno);
> > +		return false;
> 
> Do we need to undo changes if this somehow occurs?

Not sure.  The superblocks are inconsistent now, but I guess we should
try to put them back the way they were.

> > +	}
> > +
> > +	mp->m_sb.sb_features_compat |= compat;
> > +	mp->m_sb.sb_features_ro_compat |= ro_compat;
> > +	mp->m_sb.sb_features_incompat |= incompat;
> > +	mp->m_sb.sb_features_log_incompat |= log_incompat;
> > +	return true;
> > +}
> > +
> >  static char *
> >  version_string(
> >  	xfs_sb_t	*sbp)
> > @@ -705,6 +743,10 @@ version_f(
> 
> The comment above version_f() needs an update if we start to support v5
> features.

Ok.

> >  {
> >  	uint16_t	version = 0;
> >  	uint32_t	features = 0;
> > +	uint32_t	upgrade_compat = 0;
> > +	uint32_t	upgrade_ro_compat = 0;
> > +	uint32_t	upgrade_incompat = 0;
> > +	uint32_t	upgrade_log_incompat = 0;
> >  	xfs_agnumber_t	ag;
> >  
> >  	if (argc == 2) {	/* WRITE VERSION */
> > @@ -716,7 +758,28 @@ version_f(
> >  		}
> >  
> >  		/* Logic here derived from the IRIX xfs_chver(1M) script. */
> > -		if (!strcasecmp(argv[1], "extflg")) {
> > +		if (!strcasecmp(argv[1], "inobtcount")) {
> > +			if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
> > +				dbprintf(
> > +		_("inode btree counter feature is already enabled\n"));
> > +				exitcode = 1;
> > +				return 1;
> > +			}
> > +			if (!xfs_sb_version_hasfinobt(&mp->m_sb)) {
> > +				dbprintf(
> > +		_("inode btree counter feature cannot be enabled on filesystems lacking free inode btrees\n"));
> > +				exitcode = 1;
> > +				return 1;
> > +			}
> > +			if (!xfs_sb_version_hascrc(&mp->m_sb)) {
> > +				dbprintf(
> > +		_("inode btree counter feature cannot be enabled on pre-V5 filesystems\n"));
> > +				exitcode = 1;
> > +				return 1;
> > +			}
> > +
> > +			upgrade_ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT;
> > +		} else if (!strcasecmp(argv[1], "extflg")) {
> >  			switch (XFS_SB_VERSION_NUM(&mp->m_sb)) {
> >  			case XFS_SB_VERSION_1:
> >  				version = 0x0004 | XFS_SB_VERSION_EXTFLGBIT;
> > @@ -807,6 +870,17 @@ version_f(
> >  			mp->m_sb.sb_versionnum = version;
> >  			mp->m_sb.sb_features2 = features;
> >  		}
> > +
> > +		if (upgrade_compat || upgrade_ro_compat || upgrade_incompat ||
> > +		    upgrade_log_incompat) {
> > +			if (!add_v5_features(mp, upgrade_compat,
> > +					upgrade_ro_compat,
> > +					upgrade_incompat,
> > +					upgrade_log_incompat)) {
> > +				exitcode = 1;
> > +				return 1;
> > +			}
> > +		}
> 
> What's the purpose of the unused upgrade variables?

I'm laying the groundwork for adding more features later, such as
bigtime and atomic log swapping.

> Also, it looks like we just update the feature bits here. What about the
> counters? Is the user expected to run xfs_repair?

Yes.  Come to think of it, I could set sb_inprogress and force the user
to run repair.

> 
> >  	}
> >  
> >  	if (argc == 3) {	/* VERSIONNUM + FEATURES2 */
> > diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
> > index bd325da2f776..0f0c8d18d6cb 100755
> > --- a/db/xfs_admin.sh
> > +++ b/db/xfs_admin.sh
> > @@ -9,7 +9,7 @@ DB_OPTS=""
> >  REPAIR_OPTS=""
> >  USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] device [logdev]"
> >  
> > -while getopts "efjlpuc:L:U:V" c
> > +while getopts "efjlpuc:L:O:U:V" c
> >  do
> >  	case $c in
> >  	c)	REPAIR_OPTS=$REPAIR_OPTS" -c lazycount="$OPTARG;;
> > @@ -19,6 +19,8 @@ do
> >  	l)	DB_OPTS=$DB_OPTS" -r -c label";;
> >  	L)	DB_OPTS=$DB_OPTS" -c 'label "$OPTARG"'";;
> >  	p)	DB_OPTS=$DB_OPTS" -c 'version projid32bit'";;
> > +	O)	DB_OPTS=$DB_OPTS" -c 'version "$OPTARG"'";
> > +		REPAIR_OPTS="$REPAIR_OPTS ";;
> 
> Ah, I see.. xfs_admin runs repair if options are specified, hence this
> little whitespace hack. It might be worth a comment here so somebody
> doesn't fly by and "clean" that up. ;)

Ok.

> BTW, does this also address the error scenario I asked about above...?

Yes, but I think we should revert the primary super if the upgrade
fails.

> >  	u)	DB_OPTS=$DB_OPTS" -r -c uuid";;
> >  	U)	DB_OPTS=$DB_OPTS" -c 'uuid "$OPTARG"'";;
> >  	V)	xfs_db -p xfs_admin -V
> > diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
> > index 8afc873fb50a..65ca6afc1e12 100644
> > --- a/man/man8/xfs_admin.8
> > +++ b/man/man8/xfs_admin.8
> > @@ -6,6 +6,8 @@ xfs_admin \- change parameters of an XFS filesystem
> >  [
> >  .B \-eflpu
> >  ] [
> > +.BR \-O " feature"
> > +] [
> >  .BR "\-c 0" | 1
> >  ] [
> >  .B \-L
> > @@ -103,6 +105,20 @@ The filesystem label can be cleared using the special "\c
> >  " value for
> >  .IR label .
> >  .TP
> > +.BI \-O " feature"
> > +Add a new feature to the filesystem.
> > +Only one feature can be specified at a time.
> > +Features are as follows:
> > +.RS 0.7i
> > +.TP
> > +.B inobtcount
> > +Upgrade the filesystem to support the inode btree counters feature.
> > +This reduces mount time by caching the size of the inode btrees in the
> > +allocation group metadata.
> > +Once enabled, the filesystem will not be writable by older kernels.
> > +The filesystem cannot be downgraded after this feature is enabled.
> 
> Any reason for not allowing the downgrade path? It seems like we're
> mostly there implementation wise and that might facilitate enabling the
> feature by default.

Downgrading will not be easy for bigtime, since we'd have to scan the
whole fs to see if there are any timestamps past 2038.  For other
features it might not be such a big deal, but in general I don't want to
increase our testing burden even further.

I'll ask the ext4 folks if they know of people downgrading filesystems
with tune2fs, but AFAIK it's not generally done.

--D

> Brian
> 
> > +.RE
> > +.TP
> >  .BI \-U " uuid"
> >  Set the UUID of the filesystem to
> >  .IR uuid .
> > 
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 6/9] xfs_repair: check inode btree block counters in AGI
  2020-10-28 17:29   ` Brian Foster
@ 2020-10-29  1:01     ` Darrick J. Wong
  0 siblings, 0 replies; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-29  1:01 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Wed, Oct 28, 2020 at 01:29:59PM -0400, Brian Foster wrote:
> On Mon, Oct 26, 2020 at 04:33:44PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Make sure that both inode btree block counters in the AGI are correct.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  repair/scan.c |   38 +++++++++++++++++++++++++++++++++++---
> >  1 file changed, 35 insertions(+), 3 deletions(-)
> > 
> > 
> > diff --git a/repair/scan.c b/repair/scan.c
> > index 2a38ae5197c6..c826af7dee86 100644
> > --- a/repair/scan.c
> > +++ b/repair/scan.c
> ...
> > @@ -2022,6 +2029,17 @@ scan_inobt(
> >  			return;
> >  	}
> >  
> > +	switch (magic) {
> > +	case XFS_FIBT_MAGIC:
> > +	case XFS_FIBT_CRC_MAGIC:
> > +		ipriv->fino_blocks++;
> > +		break;
> > +	case XFS_IBT_MAGIC:
> > +	case XFS_IBT_CRC_MAGIC:
> > +		ipriv->ino_blocks++;
> > +		break;
> > +	}
> > +
> 
> Is this intentionally not folded into the earlier magic switch
> statement?

I'd originally thought that we wouldn't want to count the block if it's
obviously bad, but the tree and the agi counter are supposed to be
consistent with each other, so yes, we should always bump the counter
when we are pointed towards a block.

> >  	/*
> >  	 * check for btree blocks multiply claimed, any unknown/free state
> >  	 * is ok in the bitmap block.
> ...
> > @@ -2393,6 +2414,17 @@ validate_agi(
> >  		}
> >  	}
> >  
> > +	if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
> > +		if (be32_to_cpu(agi->agi_iblocks) != priv.ino_blocks)
> > +			do_warn(_("bad inobt block count %u, saw %u\n"),
> > +					priv.ino_blocks,
> > +					be32_to_cpu(agi->agi_iblocks));
> 
> These two params are backwards (here and below), no?

Oops.  Good catch!

--D

> Brian
> 
> > +		if (be32_to_cpu(agi->agi_fblocks) != priv.fino_blocks)
> > +			do_warn(_("bad finobt block count %u, saw %u\n"),
> > +					priv.fino_blocks,
> > +					be32_to_cpu(agi->agi_fblocks));
> > +	}
> > +
> >  	if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
> >  		do_warn(_("agi_count %u, counted %u in ag %u\n"),
> >  			 be32_to_cpu(agi->agi_count), agcnts->agicount, agno);
> > 
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 8/9] mkfs: enable the inode btree counter feature
  2020-10-28 17:30   ` Brian Foster
@ 2020-10-29  1:02     ` Darrick J. Wong
  0 siblings, 0 replies; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-29  1:02 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Wed, Oct 28, 2020 at 01:30:38PM -0400, Brian Foster wrote:
> On Mon, Oct 26, 2020 at 04:33:56PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Teach mkfs how to enable the inode btree counter feature.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  man/man8/mkfs.xfs.8 |   15 +++++++++++++++
> >  mkfs/xfs_mkfs.c     |   34 +++++++++++++++++++++++++++++++++-
> >  2 files changed, 48 insertions(+), 1 deletion(-)
> > 
> > 
> > diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8
> > index 0a7858748457..1a6a5f93f0ea 100644
> > --- a/man/man8/mkfs.xfs.8
> > +++ b/man/man8/mkfs.xfs.8
> ...
> > @@ -862,7 +871,8 @@ usage( void )
> >  {
> >  	fprintf(stderr, _("Usage: %s\n\
> >  /* blocksize */		[-b size=num]\n\
> > -/* metadata */		[-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1,reflink=0|1]\n\
> > +/* metadata */		[-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1,reflink=0|1,\n\
> > +			    inobtcnt=0|1]\n\
> >  /* data subvol */	[-d agcount=n,agsize=n,file,name=xxx,size=num,\n\
> >  			    (sunit=value,swidth=value|su=num,sw=num|noalign),\n\
> >  			    sectsize=num\n\
> 
> Any plans to add a geometry flag so the feature state is reported on
> success? Otherwise LGTM:

<shrug> This feature doesn't enable any user-visible functionality, so I
don't see much point in doing so.

Thanks for the review!

--D

> Reviewed-by: Brian Foster <bfoster@redhat.com>
> 
> > @@ -1565,6 +1575,9 @@ meta_opts_parser(
> >  	case M_REFLINK:
> >  		cli->sb_feat.reflink = getnum(value, opts, subopt);
> >  		break;
> > +	case M_INOBTCNT:
> > +		cli->sb_feat.inobtcnt = getnum(value, opts, subopt);
> > +		break;
> >  	default:
> >  		return -EINVAL;
> >  	}
> > @@ -1988,6 +2001,22 @@ _("reflink not supported without CRC support\n"));
> >  			usage();
> >  		}
> >  		cli->sb_feat.reflink = false;
> > +
> > +		if (cli->sb_feat.inobtcnt && cli_opt_set(&mopts, M_INOBTCNT)) {
> > +			fprintf(stderr,
> > +_("inode btree counters not supported without CRC support\n"));
> > +			usage();
> > +		}
> > +		cli->sb_feat.inobtcnt = false;
> > +	}
> > +
> > +	if (!cli->sb_feat.finobt) {
> > +		if (cli->sb_feat.inobtcnt && cli_opt_set(&mopts, M_INOBTCNT)) {
> > +			fprintf(stderr,
> > +_("inode btree counters not supported without finobt support\n"));
> > +			usage();
> > +		}
> > +		cli->sb_feat.inobtcnt = false;
> >  	}
> >  
> >  	if ((cli->fsx.fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
> > @@ -2955,6 +2984,8 @@ sb_set_features(
> >  		sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_RMAPBT;
> >  	if (fp->reflink)
> >  		sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_REFLINK;
> > +	if (fp->inobtcnt)
> > +		sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT;
> >  
> >  	/*
> >  	 * Sparse inode chunk support has two main inode alignment requirements.
> > @@ -3617,6 +3648,7 @@ main(
> >  			.spinodes = true,
> >  			.rmapbt = false,
> >  			.reflink = true,
> > +			.inobtcnt = false,
> >  			.parent_pointers = false,
> >  			.nodalign = false,
> >  			.nortalign = false,
> > 
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 5/9] xfs_db: add inobtcnt upgrade path
  2020-10-29  0:03     ` Darrick J. Wong
@ 2020-10-29 12:09       ` Brian Foster
  2020-10-29 15:42         ` Darrick J. Wong
  0 siblings, 1 reply; 26+ messages in thread
From: Brian Foster @ 2020-10-29 12:09 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Wed, Oct 28, 2020 at 05:03:32PM -0700, Darrick J. Wong wrote:
> On Wed, Oct 28, 2020 at 01:29:25PM -0400, Brian Foster wrote:
> > On Mon, Oct 26, 2020 at 04:33:38PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Enable users to upgrade their filesystems to support inode btree block
> > > counters.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  db/sb.c              |   76 +++++++++++++++++++++++++++++++++++++++++++++++++-
> > >  db/xfs_admin.sh      |    4 ++-
> > >  man/man8/xfs_admin.8 |   16 +++++++++++
> > >  3 files changed, 94 insertions(+), 2 deletions(-)
> > > 
> > > 
...
> > > diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
> > > index 8afc873fb50a..65ca6afc1e12 100644
> > > --- a/man/man8/xfs_admin.8
> > > +++ b/man/man8/xfs_admin.8
...
> > > @@ -103,6 +105,20 @@ The filesystem label can be cleared using the special "\c
> > >  " value for
> > >  .IR label .
> > >  .TP
> > > +.BI \-O " feature"
> > > +Add a new feature to the filesystem.
> > > +Only one feature can be specified at a time.
> > > +Features are as follows:
> > > +.RS 0.7i
> > > +.TP
> > > +.B inobtcount
> > > +Upgrade the filesystem to support the inode btree counters feature.
> > > +This reduces mount time by caching the size of the inode btrees in the
> > > +allocation group metadata.
> > > +Once enabled, the filesystem will not be writable by older kernels.
> > > +The filesystem cannot be downgraded after this feature is enabled.
> > 
> > Any reason for not allowing the downgrade path? It seems like we're
> > mostly there implementation wise and that might facilitate enabling the
> > feature by default.
> 
> Downgrading will not be easy for bigtime, since we'd have to scan the
> whole fs to see if there are any timestamps past 2038.  For other
> features it might not be such a big deal, but in general I don't want to
> increase our testing burden even further.
> 

Well it's not something I'd say we should necessarily support for every
feature. TBH, I wasn't expecting an upgrade mechanism for inobtcount in
the first place since I thought we didn't tend to do that for v5
filesystems. ISTM it's simple enough to support and perhaps that's good
enough reason to do so, but if we're going to move the "test burden"
line anyways it seems rather arbitrary to me to support one direction
and not the other when they are presumably of comparable complexity.
Just my .02 of course, and I don't feel strongly about whether we
support upgrade, downgrade, or neither...

Brian

> I'll ask the ext4 folks if they know of people downgrading filesystems
> with tune2fs, but AFAIK it's not generally done.
> 
> --D
> 
> > Brian
> > 
> > > +.RE
> > > +.TP
> > >  .BI \-U " uuid"
> > >  Set the UUID of the filesystem to
> > >  .IR uuid .
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 5/9] xfs_db: add inobtcnt upgrade path
  2020-10-29 12:09       ` Brian Foster
@ 2020-10-29 15:42         ` Darrick J. Wong
  0 siblings, 0 replies; 26+ messages in thread
From: Darrick J. Wong @ 2020-10-29 15:42 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Thu, Oct 29, 2020 at 08:09:05AM -0400, Brian Foster wrote:
> On Wed, Oct 28, 2020 at 05:03:32PM -0700, Darrick J. Wong wrote:
> > On Wed, Oct 28, 2020 at 01:29:25PM -0400, Brian Foster wrote:
> > > On Mon, Oct 26, 2020 at 04:33:38PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > 
> > > > Enable users to upgrade their filesystems to support inode btree block
> > > > counters.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > >  db/sb.c              |   76 +++++++++++++++++++++++++++++++++++++++++++++++++-
> > > >  db/xfs_admin.sh      |    4 ++-
> > > >  man/man8/xfs_admin.8 |   16 +++++++++++
> > > >  3 files changed, 94 insertions(+), 2 deletions(-)
> > > > 
> > > > 
> ...
> > > > diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
> > > > index 8afc873fb50a..65ca6afc1e12 100644
> > > > --- a/man/man8/xfs_admin.8
> > > > +++ b/man/man8/xfs_admin.8
> ...
> > > > @@ -103,6 +105,20 @@ The filesystem label can be cleared using the special "\c
> > > >  " value for
> > > >  .IR label .
> > > >  .TP
> > > > +.BI \-O " feature"
> > > > +Add a new feature to the filesystem.
> > > > +Only one feature can be specified at a time.
> > > > +Features are as follows:
> > > > +.RS 0.7i
> > > > +.TP
> > > > +.B inobtcount
> > > > +Upgrade the filesystem to support the inode btree counters feature.
> > > > +This reduces mount time by caching the size of the inode btrees in the
> > > > +allocation group metadata.
> > > > +Once enabled, the filesystem will not be writable by older kernels.
> > > > +The filesystem cannot be downgraded after this feature is enabled.
> > > 
> > > Any reason for not allowing the downgrade path? It seems like we're
> > > mostly there implementation wise and that might facilitate enabling the
> > > feature by default.
> > 
> > Downgrading will not be easy for bigtime, since we'd have to scan the
> > whole fs to see if there are any timestamps past 2038.  For other
> > features it might not be such a big deal, but in general I don't want to
> > increase our testing burden even further.
> > 
> 
> Well it's not something I'd say we should necessarily support for every
> feature. TBH, I wasn't expecting an upgrade mechanism for inobtcount in
> the first place since I thought we didn't tend to do that for v5
> filesystems.

We've not been especially consistent with that -- I think you can
"upgrade" to meta_uuid; at one point Christoph had a series to make it
possible to upgrade to reflink; but all the other features don't support
upgrades.

I picked inobtcount for the initial v5 upgrade support because it seemed
like a small enough feature that people could theoretically backport the
feature to old kernels.  That was supposed to go in ahead of y2038
support (which is my real motivation for allowing some upgrades) but
then they both landed at the same time. ;)

> ISTM it's simple enough to support and perhaps that's good
> enough reason to do so, but if we're going to move the "test burden"
> line anyways it seems rather arbitrary to me to support one direction
> and not the other when they are presumably of comparable complexity.
> Just my .02 of course, and I don't feel strongly about whether we
> support upgrade, downgrade, or neither...

I think the only features that we've supported downgrading are attr2 ->
attr1 and (in the old days) unwritten extents.

--D

> Brian
> 
> > I'll ask the ext4 folks if they know of people downgrading filesystems
> > with tune2fs, but AFAIK it's not generally done.
> > 
> > --D
> > 
> > > Brian
> > > 
> > > > +.RE
> > > > +.TP
> > > >  .BI \-U " uuid"
> > > >  Set the UUID of the filesystem to
> > > >  .IR uuid .
> > > > 
> > > 
> > 
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 6/9] xfs_repair: check inode btree block counters in AGI
  2020-10-26 23:33 ` [PATCH 6/9] xfs_repair: check inode btree block counters in AGI Darrick J. Wong
  2020-10-28 17:29   ` Brian Foster
@ 2020-11-16 17:19   ` Darrick J. Wong
  2020-11-16 20:29     ` Eric Sandeen
  1 sibling, 1 reply; 26+ messages in thread
From: Darrick J. Wong @ 2020-11-16 17:19 UTC (permalink / raw)
  To: sandeen, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Make sure that both inode btree block counters in the AGI are correct.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
v2: fix backwards complaint message, consolidate switch statements
---
 repair/scan.c |   29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/repair/scan.c b/repair/scan.c
index 2a38ae5197c6..44e794a0ac5a 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -1948,6 +1948,12 @@ _("invalid inode count, inode chunk %d/%u, count %d ninodes %d\n"),
 	return suspect;
 }
 
+struct ino_priv {
+	struct aghdr_cnts	*agcnts;
+	uint32_t		ino_blocks;
+	uint32_t		fino_blocks;
+};
+
 /*
  * this one walks the inode btrees sucking the info there into
  * the incore avl tree.  We try and rescue corrupted btree records
@@ -1976,7 +1982,8 @@ scan_inobt(
 	void			*priv,
 	const struct xfs_buf_ops *ops)
 {
-	struct aghdr_cnts	*agcnts = priv;
+	struct ino_priv		*ipriv = priv;
+	struct aghdr_cnts	*agcnts = ipriv->agcnts;
 	char			*name;
 	xfs_agino_t		lastino = 0;
 	int			i;
@@ -1994,10 +2001,12 @@ scan_inobt(
 	case XFS_FIBT_MAGIC:
 	case XFS_FIBT_CRC_MAGIC:
 		name = "fino";
+		ipriv->fino_blocks++;
 		break;
 	case XFS_IBT_MAGIC:
 	case XFS_IBT_CRC_MAGIC:
 		name = "ino";
+		ipriv->ino_blocks++;
 		break;
 	default:
 		name = "(unknown)";
@@ -2363,6 +2372,9 @@ validate_agi(
 	xfs_agnumber_t		agno,
 	struct aghdr_cnts	*agcnts)
 {
+	struct ino_priv		priv = {
+		.agcnts = agcnts,
+	};
 	xfs_agblock_t		bno;
 	int			i;
 	uint32_t		magic;
@@ -2372,7 +2384,7 @@ validate_agi(
 		magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_IBT_CRC_MAGIC
 							 : XFS_IBT_MAGIC;
 		scan_sbtree(bno, be32_to_cpu(agi->agi_level),
-			    agno, 0, scan_inobt, 1, magic, agcnts,
+			    agno, 0, scan_inobt, 1, magic, &priv,
 			    &xfs_inobt_buf_ops);
 	} else {
 		do_warn(_("bad agbno %u for inobt root, agno %d\n"),
@@ -2385,7 +2397,7 @@ validate_agi(
 			magic = xfs_sb_version_hascrc(&mp->m_sb) ?
 					XFS_FIBT_CRC_MAGIC : XFS_FIBT_MAGIC;
 			scan_sbtree(bno, be32_to_cpu(agi->agi_free_level),
-				    agno, 0, scan_inobt, 1, magic, agcnts,
+				    agno, 0, scan_inobt, 1, magic, &priv,
 				    &xfs_finobt_buf_ops);
 		} else {
 			do_warn(_("bad agbno %u for finobt root, agno %d\n"),
@@ -2393,6 +2405,17 @@ validate_agi(
 		}
 	}
 
+	if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+		if (be32_to_cpu(agi->agi_iblocks) != priv.ino_blocks)
+			do_warn(_("bad inobt block count %u, saw %u\n"),
+					be32_to_cpu(agi->agi_iblocks),
+					priv.ino_blocks);
+		if (be32_to_cpu(agi->agi_fblocks) != priv.fino_blocks)
+			do_warn(_("bad finobt block count %u, saw %u\n"),
+					be32_to_cpu(agi->agi_fblocks),
+					priv.fino_blocks);
+	}
+
 	if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
 		do_warn(_("agi_count %u, counted %u in ag %u\n"),
 			 be32_to_cpu(agi->agi_count), agcnts->agicount, agno);

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 6/9] xfs_repair: check inode btree block counters in AGI
  2020-11-16 17:19   ` [PATCH v2 " Darrick J. Wong
@ 2020-11-16 20:29     ` Eric Sandeen
  0 siblings, 0 replies; 26+ messages in thread
From: Eric Sandeen @ 2020-11-16 20:29 UTC (permalink / raw)
  To: Darrick J. Wong, bfoster; +Cc: linux-xfs

On 11/16/20 11:19 AM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Make sure that both inode btree block counters in the AGI are correct.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> v2: fix backwards complaint message, consolidate switch statements
> ---
>  repair/scan.c |   29 ++++++++++++++++++++++++++---

Looks like this fixes Brian's concerns, thanks both!

Reviewed-by: Eric Sandeen <sandeen@redhat.com>



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 5/9] xfs_db: add inobtcnt upgrade path
  2020-10-26 23:33 ` [PATCH 5/9] xfs_db: add inobtcnt upgrade path Darrick J. Wong
  2020-10-28 17:29   ` Brian Foster
@ 2020-11-16 21:13   ` Darrick J. Wong
  2020-11-18 21:05     ` Eric Sandeen
  1 sibling, 1 reply; 26+ messages in thread
From: Darrick J. Wong @ 2020-11-16 21:13 UTC (permalink / raw)
  To: sandeen, bfoster; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Enable users to upgrade their filesystems to support inode btree block
counters.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
v2: set inprogress to force repair (which xfs_admin immediately does),
clean up the code to pass around fewer arguments, and try to revert the
change if we hit io errors
---
 db/sb.c              |  149 ++++++++++++++++++++++++++++++++++++++++++++++++--
 db/xfs_admin.sh      |    6 ++
 man/man8/xfs_admin.8 |   16 +++++
 3 files changed, 163 insertions(+), 8 deletions(-)

diff --git a/db/sb.c b/db/sb.c
index e3b1fe0b2e6e..cfc2e32023fc 100644
--- a/db/sb.c
+++ b/db/sb.c
@@ -620,6 +620,117 @@ do_version(xfs_agnumber_t agno, uint16_t version, uint32_t features)
 	return 1;
 }
 
+struct v5feat {
+	uint32_t		compat;
+	uint32_t		ro_compat;
+	uint32_t		incompat;
+	uint32_t		log_incompat;
+};
+
+static void
+get_v5_features(
+	struct xfs_mount	*mp,
+	struct v5feat		*feat)
+{
+	feat->compat = mp->m_sb.sb_features_compat;
+	feat->ro_compat = mp->m_sb.sb_features_ro_compat;
+	feat->incompat = mp->m_sb.sb_features_incompat;
+	feat->log_incompat = mp->m_sb.sb_features_log_incompat;
+}
+
+static bool
+set_v5_features(
+	struct xfs_mount	*mp,
+	const struct v5feat	*upgrade)
+{
+	struct xfs_sb		tsb;
+	struct v5feat		old;
+	xfs_agnumber_t		agno = 0;
+	xfs_agnumber_t		revert_agno = 0;
+
+	if (upgrade->compat == mp->m_sb.sb_features_compat &&
+	    upgrade->ro_compat == mp->m_sb.sb_features_ro_compat &&
+	    upgrade->incompat == mp->m_sb.sb_features_incompat &&
+	    upgrade->log_incompat == mp->m_sb.sb_features_log_incompat)
+		return true;
+
+	dbprintf(_("Upgrading V5 filesystem\n"));
+
+	/* Upgrade primary superblock. */
+	if (!get_sb(agno, &tsb))
+		goto fail;
+
+	/* Save old values */
+	old.compat = tsb.sb_features_compat;
+	old.ro_compat = tsb.sb_features_ro_compat;
+	old.incompat = tsb.sb_features_incompat;
+	old.log_incompat = tsb.sb_features_log_incompat;
+
+	/* Update feature flags and force user to run repair before mounting. */
+	tsb.sb_features_compat |= upgrade->compat;
+	tsb.sb_features_ro_compat |= upgrade->ro_compat;
+	tsb.sb_features_incompat |= upgrade->incompat;
+	tsb.sb_features_log_incompat |= upgrade->log_incompat;
+	tsb.sb_inprogress = 1;
+	libxfs_sb_to_disk(iocur_top->data, &tsb);
+
+	/* Write new primary superblock */
+	write_cur();
+	if (!iocur_top->bp || iocur_top->bp->b_error)
+		goto fail;
+
+	/* Update the secondary superblocks, or revert. */
+	for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) {
+		if (!get_sb(agno, &tsb)) {
+			agno--;
+			goto revert;
+		}
+
+		/* Set features on secondary suepr. */
+		tsb.sb_features_compat |= upgrade->compat;
+		tsb.sb_features_ro_compat |= upgrade->ro_compat;
+		tsb.sb_features_incompat |= upgrade->incompat;
+		tsb.sb_features_log_incompat |= upgrade->log_incompat;
+		libxfs_sb_to_disk(iocur_top->data, &tsb);
+		write_cur();
+
+		/* Write or abort. */
+		if (!iocur_top->bp || iocur_top->bp->b_error)
+			goto revert;
+	}
+
+	/* All superblocks updated, update the incore values. */
+	mp->m_sb.sb_features_compat |= upgrade->compat;
+	mp->m_sb.sb_features_ro_compat |= upgrade->ro_compat;
+	mp->m_sb.sb_features_incompat |= upgrade->incompat;
+	mp->m_sb.sb_features_log_incompat |= upgrade->log_incompat;
+
+	dbprintf(_("Upgraded V5 filesystem.  Please run xfs_repair.\n"));
+	return true;
+
+revert:
+	/*
+	 * Try to revert feature flag changes, and don't worry if we fail.
+	 * We're probably in a mess anyhow, and the admin will have to run
+	 * repair anyways.
+	 */
+	for (revert_agno = 0; revert_agno <= agno; revert_agno++) {
+		if (!get_sb(revert_agno, &tsb))
+			continue;
+
+		tsb.sb_features_compat = old.compat;
+		tsb.sb_features_ro_compat = old.ro_compat;
+		tsb.sb_features_incompat = old.incompat;
+		tsb.sb_features_log_incompat = old.log_incompat;
+		libxfs_sb_to_disk(iocur_top->data, &tsb);
+		write_cur();
+	}
+fail:
+	dbprintf(
+_("Failed to upgrade V5 filesystem at AG %d, please run xfs_repair.\n"), agno);
+	return false;
+}
+
 static char *
 version_string(
 	xfs_sb_t	*sbp)
@@ -692,12 +803,7 @@ version_string(
 	return s;
 }
 
-/*
- * XXX: this only supports reading and writing to version 4 superblock fields.
- * V5 superblocks always define certain V4 feature bits - they are blocked from
- * being changed if a V5 sb is detected, but otherwise v5 superblock features
- * are not handled here.
- */
+/* Upgrade a superblock to support a feature. */
 static int
 version_f(
 	int		argc,
@@ -708,6 +814,9 @@ version_f(
 	xfs_agnumber_t	ag;
 
 	if (argc == 2) {	/* WRITE VERSION */
+		struct v5feat	v5features;
+
+		get_v5_features(mp, &v5features);
 
 		if ((x.isreadonly & LIBXFS_ISREADONLY) || !expert_mode) {
 			dbprintf(_("%s: not in expert mode, writing disabled\n"),
@@ -716,7 +825,28 @@ version_f(
 		}
 
 		/* Logic here derived from the IRIX xfs_chver(1M) script. */
-		if (!strcasecmp(argv[1], "extflg")) {
+		if (!strcasecmp(argv[1], "inobtcount")) {
+			if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+				dbprintf(
+		_("inode btree counter feature is already enabled\n"));
+				exitcode = 1;
+				return 1;
+			}
+			if (!xfs_sb_version_hasfinobt(&mp->m_sb)) {
+				dbprintf(
+		_("inode btree counter feature cannot be enabled on filesystems lacking free inode btrees\n"));
+				exitcode = 1;
+				return 1;
+			}
+			if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+				dbprintf(
+		_("inode btree counter feature cannot be enabled on pre-V5 filesystems\n"));
+				exitcode = 1;
+				return 1;
+			}
+
+			v5features.ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT;
+		} else if (!strcasecmp(argv[1], "extflg")) {
 			switch (XFS_SB_VERSION_NUM(&mp->m_sb)) {
 			case XFS_SB_VERSION_1:
 				version = 0x0004 | XFS_SB_VERSION_EXTFLGBIT;
@@ -807,6 +937,11 @@ version_f(
 			mp->m_sb.sb_versionnum = version;
 			mp->m_sb.sb_features2 = features;
 		}
+
+		if (!set_v5_features(mp, &v5features)) {
+			exitcode = 1;
+			return 1;
+		}
 	}
 
 	if (argc == 3) {	/* VERSIONNUM + FEATURES2 */
diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
index bd325da2f776..41a14d4521ba 100755
--- a/db/xfs_admin.sh
+++ b/db/xfs_admin.sh
@@ -9,7 +9,7 @@ DB_OPTS=""
 REPAIR_OPTS=""
 USAGE="Usage: xfs_admin [-efjlpuV] [-c 0|1] [-L label] [-U uuid] device [logdev]"
 
-while getopts "efjlpuc:L:U:V" c
+while getopts "efjlpuc:L:O:U:V" c
 do
 	case $c in
 	c)	REPAIR_OPTS=$REPAIR_OPTS" -c lazycount="$OPTARG;;
@@ -19,6 +19,9 @@ do
 	l)	DB_OPTS=$DB_OPTS" -r -c label";;
 	L)	DB_OPTS=$DB_OPTS" -c 'label "$OPTARG"'";;
 	p)	DB_OPTS=$DB_OPTS" -c 'version projid32bit'";;
+	O)	DB_OPTS=$DB_OPTS" -c 'version "$OPTARG"'";
+		# Force repair to run by adding a single space to REPAIR_OPTS
+		REPAIR_OPTS="$REPAIR_OPTS ";;
 	u)	DB_OPTS=$DB_OPTS" -r -c uuid";;
 	U)	DB_OPTS=$DB_OPTS" -c 'uuid "$OPTARG"'";;
 	V)	xfs_db -p xfs_admin -V
@@ -48,6 +51,7 @@ case $# in
 		fi
 		if [ -n "$REPAIR_OPTS" ]
 		then
+			echo "Running xfs_repair to ensure filesystem consistency."
 			# Hide normal repair output which is sent to stderr
 			# assuming the filesystem is fine when a user is
 			# running xfs_admin.
diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
index 8afc873fb50a..65ca6afc1e12 100644
--- a/man/man8/xfs_admin.8
+++ b/man/man8/xfs_admin.8
@@ -6,6 +6,8 @@ xfs_admin \- change parameters of an XFS filesystem
 [
 .B \-eflpu
 ] [
+.BR \-O " feature"
+] [
 .BR "\-c 0" | 1
 ] [
 .B \-L
@@ -103,6 +105,20 @@ The filesystem label can be cleared using the special "\c
 " value for
 .IR label .
 .TP
+.BI \-O " feature"
+Add a new feature to the filesystem.
+Only one feature can be specified at a time.
+Features are as follows:
+.RS 0.7i
+.TP
+.B inobtcount
+Upgrade the filesystem to support the inode btree counters feature.
+This reduces mount time by caching the size of the inode btrees in the
+allocation group metadata.
+Once enabled, the filesystem will not be writable by older kernels.
+The filesystem cannot be downgraded after this feature is enabled.
+.RE
+.TP
 .BI \-U " uuid"
 Set the UUID of the filesystem to
 .IR uuid .

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 5/9] xfs_db: add inobtcnt upgrade path
  2020-11-16 21:13   ` [PATCH v2 " Darrick J. Wong
@ 2020-11-18 21:05     ` Eric Sandeen
  2020-11-20  1:05       ` Darrick J. Wong
  0 siblings, 1 reply; 26+ messages in thread
From: Eric Sandeen @ 2020-11-18 21:05 UTC (permalink / raw)
  To: Darrick J. Wong, bfoster; +Cc: linux-xfs

On 11/16/20 3:13 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Enable users to upgrade their filesystems to support inode btree block
> counters.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> v2: set inprogress to force repair (which xfs_admin immediately does),
> clean up the code to pass around fewer arguments, and try to revert the
> change if we hit io errors
> ---

sooooo the inprogress thing sets off some unexpected behavior.

In testing this, I noticed that if we have inprogress set, and uknown features/
version on disk, we go looking for backup superblocks and actually end up
corrupting the filesystem before bailing out:

# xfs_repair /dev/pmem0p2 
Phase 1 - find and verify superblock...
bad primary superblock - filesystem mkfs-in-progress bit set !!!

attempting to find secondary superblock...
.found candidate secondary superblock...
verified secondary superblock...
writing modified primary superblock
sb realtime bitmap inode value 18446744073709551615 (NULLFSINO) inconsistent with calculated value 129
resetting superblock realtime bitmap inode pointer to 129
sb realtime summary inode value 18446744073709551615 (NULLFSINO) inconsistent with calculated value 130
resetting superblock realtime summary inode pointer to 130
Superblock has unknown compat/rocompat/incompat features (0x0/0x8/0x0).
Using a more recent xfs_repair is recommended.
Found unsupported filesystem features.  Exiting now.

# xfs_db -c check /dev/pmem0p2
disconnected inode 129, nlink 1
disconnected inode 130, nlink 1

so this seems to have exposed a hole in how repair deals with unknown features
when the inprogress bit is set.

And TBH scampering off to find backup superblocks to "repair" an inprogress
filesystem seems like ... not the right thing to do after a feature upgrade.

I'm not sure what's better, but 

> bad primary superblock - filesystem mkfs-in-progress bit set !!!

seems ... unexpected for this purpose.

-Eric


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 5/9] xfs_db: add inobtcnt upgrade path
  2020-11-18 21:05     ` Eric Sandeen
@ 2020-11-20  1:05       ` Darrick J. Wong
  2020-11-20  4:10         ` Eric Sandeen
  0 siblings, 1 reply; 26+ messages in thread
From: Darrick J. Wong @ 2020-11-20  1:05 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: bfoster, linux-xfs

On Wed, Nov 18, 2020 at 03:05:42PM -0600, Eric Sandeen wrote:
> On 11/16/20 3:13 PM, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Enable users to upgrade their filesystems to support inode btree block
> > counters.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> > v2: set inprogress to force repair (which xfs_admin immediately does),
> > clean up the code to pass around fewer arguments, and try to revert the
> > change if we hit io errors
> > ---
> 
> sooooo the inprogress thing sets off some unexpected behavior.
> 
> In testing this, I noticed that if we have inprogress set, and uknown features/
> version on disk, we go looking for backup superblocks and actually end up
> corrupting the filesystem before bailing out:
> 
> # xfs_repair /dev/pmem0p2 
> Phase 1 - find and verify superblock...
> bad primary superblock - filesystem mkfs-in-progress bit set !!!
> 
> attempting to find secondary superblock...
> .found candidate secondary superblock...
> verified secondary superblock...
> writing modified primary superblock
> sb realtime bitmap inode value 18446744073709551615 (NULLFSINO) inconsistent with calculated value 129
> resetting superblock realtime bitmap inode pointer to 129
> sb realtime summary inode value 18446744073709551615 (NULLFSINO) inconsistent with calculated value 130
> resetting superblock realtime summary inode pointer to 130
> Superblock has unknown compat/rocompat/incompat features (0x0/0x8/0x0).
> Using a more recent xfs_repair is recommended.
> Found unsupported filesystem features.  Exiting now.
> 
> # xfs_db -c check /dev/pmem0p2
> disconnected inode 129, nlink 1
> disconnected inode 130, nlink 1
> 
> so this seems to have exposed a hole in how repair deals with unknown features
> when the inprogress bit is set.
> 
> And TBH scampering off to find backup superblocks to "repair" an inprogress
> filesystem seems like ... not the right thing to do after a feature upgrade.
> 
> I'm not sure what's better, but 
> 
> > bad primary superblock - filesystem mkfs-in-progress bit set !!!
> 
> seems ... unexpected for this purpose.

Yeah.  Dave suggested that I use an incompat flag for this, so I think
I'll do that instead since inprogress is such a mess.

--D

> -Eric
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 5/9] xfs_db: add inobtcnt upgrade path
  2020-11-20  1:05       ` Darrick J. Wong
@ 2020-11-20  4:10         ` Eric Sandeen
  0 siblings, 0 replies; 26+ messages in thread
From: Eric Sandeen @ 2020-11-20  4:10 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: bfoster, linux-xfs

On 11/19/20 7:05 PM, Darrick J. Wong wrote:
> On Wed, Nov 18, 2020 at 03:05:42PM -0600, Eric Sandeen wrote:

...

>> so this seems to have exposed a hole in how repair deals with unknown features
>> when the inprogress bit is set.
>>
>> And TBH scampering off to find backup superblocks to "repair" an inprogress
>> filesystem seems like ... not the right thing to do after a feature upgrade.
>>
>> I'm not sure what's better, but 
>>
>>> bad primary superblock - filesystem mkfs-in-progress bit set !!!
>>
>> seems ... unexpected for this purpose.
> 
> Yeah.  Dave suggested that I use an incompat flag for this, so I think
> I'll do that instead since inprogress is such a mess.

And I'll try to get to validating V5 features earlier in the repair cycle ...

-Eric

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2020-11-20  4:10 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-26 23:33 [PATCH v4 0/9] xfsprogs: add a inode btree blocks counts to the AGI header Darrick J. Wong
2020-10-26 23:33 ` [PATCH 1/9] xfs: store inode btree block counts in " Darrick J. Wong
2020-10-26 23:33 ` [PATCH 2/9] xfs: use the finobt block counts to speed up mount times Darrick J. Wong
2020-10-26 23:33 ` [PATCH 3/9] xfs: support inode btree blockcounts in online repair Darrick J. Wong
2020-10-26 23:33 ` [PATCH 4/9] xfs_db: support displaying inode btree block counts in AGI header Darrick J. Wong
2020-10-28 17:28   ` Brian Foster
2020-10-26 23:33 ` [PATCH 5/9] xfs_db: add inobtcnt upgrade path Darrick J. Wong
2020-10-28 17:29   ` Brian Foster
2020-10-29  0:03     ` Darrick J. Wong
2020-10-29 12:09       ` Brian Foster
2020-10-29 15:42         ` Darrick J. Wong
2020-11-16 21:13   ` [PATCH v2 " Darrick J. Wong
2020-11-18 21:05     ` Eric Sandeen
2020-11-20  1:05       ` Darrick J. Wong
2020-11-20  4:10         ` Eric Sandeen
2020-10-26 23:33 ` [PATCH 6/9] xfs_repair: check inode btree block counters in AGI Darrick J. Wong
2020-10-28 17:29   ` Brian Foster
2020-10-29  1:01     ` Darrick J. Wong
2020-11-16 17:19   ` [PATCH v2 " Darrick J. Wong
2020-11-16 20:29     ` Eric Sandeen
2020-10-26 23:33 ` [PATCH 7/9] xfs_repair: regenerate " Darrick J. Wong
2020-10-28 17:30   ` Brian Foster
2020-10-26 23:33 ` [PATCH 8/9] mkfs: enable the inode btree counter feature Darrick J. Wong
2020-10-28 17:30   ` Brian Foster
2020-10-29  1:02     ` Darrick J. Wong
2020-10-26 23:34 ` [PATCH 9/9] xfs: enable new inode btree counters feature Darrick J. Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.