linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: djwong@kernel.org, david@fromorbit.com
Cc: Chandan Babu R <chandan.babu@oracle.com>,
	linux-xfs@vger.kernel.org, chandan.babu@oracle.com, hch@lst.de
Subject: [PATCH 11/15] xfs: compute the maximum height of the rmap btree when reflink enabled
Date: Tue, 12 Oct 2021 16:33:34 -0700	[thread overview]
Message-ID: <163408161434.4151249.874557928540897102.stgit@magnolia> (raw)
In-Reply-To: <163408155346.4151249.8364703447365270670.stgit@magnolia>

From: Darrick J. Wong <djwong@kernel.org>

Instead of assuming that the hardcoded XFS_BTREE_MAXLEVELS value is big
enough to handle the maximally tall rmap btree when all blocks are in
use and maximally shared, let's compute the maximum height assuming the
rmapbt consumes as many blocks as possible.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Chandan Babu R <chandan.babu@oracle.com>
---
 fs/xfs/libxfs/xfs_btree.c       |   34 ++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_btree.h       |    2 +
 fs/xfs/libxfs/xfs_rmap_btree.c  |   55 ++++++++++++++++++++++++---------------
 fs/xfs/libxfs/xfs_trans_resv.c  |   13 +++++++++
 fs/xfs/libxfs/xfs_trans_space.h |    7 +++++
 5 files changed, 90 insertions(+), 21 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 6ced8f028d47..201b81d54622 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4531,6 +4531,40 @@ xfs_btree_compute_maxlevels(
 	return level;
 }
 
+/*
+ * Compute the maximum height of a btree that is allowed to consume up to the
+ * given number of blocks.
+ */
+unsigned int
+xfs_btree_compute_maxlevels_size(
+	unsigned long long	max_btblocks,
+	unsigned int		leaf_mnr)
+{
+	unsigned long long	leaf_blocks = leaf_mnr;
+	unsigned long long	blocks_left;
+	unsigned int		maxlevels;
+
+	if (max_btblocks < 1)
+		return 0;
+
+	/*
+	 * The loop increments maxlevels as long as there would be enough
+	 * blocks left in the reservation to handle each node block at the
+	 * current level pointing to the minimum possible number of leaf blocks
+	 * at the next level down.  We start the loop assuming a single-level
+	 * btree consuming one block.
+	 */
+	maxlevels = 1;
+	blocks_left = max_btblocks - 1;
+	while (leaf_blocks < blocks_left) {
+		maxlevels++;
+		blocks_left -= leaf_blocks;
+		leaf_blocks *= leaf_mnr;
+	}
+
+	return maxlevels;
+}
+
 /*
  * Query a regular btree for all records overlapping a given interval.
  * Start with a LE lookup of the key of low_rec and return all records
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index b8761a2fc24b..fccb374a8399 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -483,6 +483,8 @@ xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp,
 		unsigned int max_recs);
 
 uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
+unsigned int xfs_btree_compute_maxlevels_size(unsigned long long max_btblocks,
+		unsigned int leaf_mnr);
 unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
 
 /*
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 1b48b7b3ee30..b1b55a6e7d25 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -538,28 +538,41 @@ xfs_rmapbt_maxrecs(
 /* Compute the maximum height of an rmap btree. */
 void
 xfs_rmapbt_compute_maxlevels(
-	struct xfs_mount		*mp)
+	struct xfs_mount	*mp)
 {
-	/*
-	 * On a non-reflink filesystem, the maximum number of rmap
-	 * records is the number of blocks in the AG, hence the max
-	 * rmapbt height is log_$maxrecs($agblocks).  However, with
-	 * reflink each AG block can have up to 2^32 (per the refcount
-	 * record format) owners, which means that theoretically we
-	 * could face up to 2^64 rmap records.
-	 *
-	 * That effectively means that the max rmapbt height must be
-	 * XFS_BTREE_MAXLEVELS.  "Fortunately" we'll run out of AG
-	 * blocks to feed the rmapbt long before the rmapbt reaches
-	 * maximum height.  The reflink code uses ag_resv_critical to
-	 * disallow reflinking when less than 10% of the per-AG metadata
-	 * block reservation since the fallback is a regular file copy.
-	 */
-	if (xfs_has_reflink(mp))
-		mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS;
-	else
-		mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
-				mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
+	unsigned int		val;
+
+	if (!xfs_has_rmapbt(mp)) {
+		mp->m_rmap_maxlevels = 0;
+		return;
+	}
+
+	if (xfs_has_reflink(mp)) {
+		/*
+		 * Compute the asymptotic maxlevels for an rmap btree on a
+		 * filesystem that supports reflink.
+		 *
+		 * On a reflink filesystem, each AG block can have up to 2^32
+		 * (per the refcount record format) owners, which means that
+		 * theoretically we could face up to 2^64 rmap records.
+		 * However, we're likely to run out of blocks in the AG long
+		 * before that happens, which means that we must compute the
+		 * max height based on what the btree will look like if it
+		 * consumes almost all the blocks in the AG due to maximal
+		 * sharing factor.
+		 */
+		val = xfs_btree_compute_maxlevels_size(mp->m_sb.sb_agblocks,
+				mp->m_rmap_mnr[1]);
+	} else {
+		/*
+		 * If there's no block sharing, compute the maximum rmapbt
+		 * height assuming one rmap record per AG block.
+		 */
+		val = xfs_btree_compute_maxlevels(mp->m_rmap_mnr,
+				mp->m_sb.sb_agblocks);
+	}
+
+	mp->m_rmap_maxlevels = val;
 }
 
 /* Calculate the refcount btree size for some records. */
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 5e300daa2559..97bd17d84a23 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -814,6 +814,16 @@ xfs_trans_resv_calc(
 	struct xfs_mount	*mp,
 	struct xfs_trans_resv	*resp)
 {
+	unsigned int		rmap_maxlevels = mp->m_rmap_maxlevels;
+
+	/*
+	 * In the early days of rmap+reflink, we always set the rmap maxlevels
+	 * to 9 even if the AG was small enough that it would never grow to
+	 * that height.
+	 */
+	if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp))
+		mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS;
+
 	/*
 	 * The following transactions are logged in physical format and
 	 * require a permanent reservation on space.
@@ -916,4 +926,7 @@ xfs_trans_resv_calc(
 	resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
 	resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
 	resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
+
+	/* Put everything back the way it was.  This goes at the end. */
+	mp->m_rmap_maxlevels = rmap_maxlevels;
 }
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index 50332be34388..440c9c390b86 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -17,6 +17,13 @@
 /* Adding one rmap could split every level up to the top of the tree. */
 #define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels)
 
+/*
+ * Note that we historically set m_rmap_maxlevels to 9 when reflink was
+ * enabled, so we must preserve this behavior to avoid changing the transaction
+ * space reservations.
+ */
+#define XFS_OLD_REFLINK_RMAP_MAXLEVELS	(9)
+
 /* Blocks we might need to add "b" rmaps to a tree. */
 #define XFS_NRMAPADD_SPACE_RES(mp, b)\
 	(((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \


  parent reply	other threads:[~2021-10-12 23:33 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-12 23:32 [PATCHSET v3 00/15] xfs: support dynamic btree cursor height Darrick J. Wong
2021-10-12 23:32 ` [PATCH 01/15] xfs: remove xfs_btree_cur.bc_blocklog Darrick J. Wong
2021-10-13  0:56   ` Dave Chinner
2021-10-12 23:32 ` [PATCH 02/15] xfs: reduce the size of nr_ops for refcount btree cursors Darrick J. Wong
2021-10-13  0:57   ` Dave Chinner
2021-10-12 23:32 ` [PATCH 03/15] xfs: don't track firstrec/firstkey separately in xchk_btree Darrick J. Wong
2021-10-13  1:02   ` Dave Chinner
2021-10-12 23:32 ` [PATCH 04/15] xfs: dynamically allocate btree scrub context structure Darrick J. Wong
2021-10-13  4:57   ` Dave Chinner
2021-10-13 16:29     ` Darrick J. Wong
2021-10-12 23:33 ` [PATCH 05/15] xfs: support dynamic btree cursor heights Darrick J. Wong
2021-10-13  5:31   ` Dave Chinner
2021-10-13 16:52     ` Darrick J. Wong
2021-10-13 21:14       ` Dave Chinner
2021-10-12 23:33 ` [PATCH 06/15] xfs: rearrange xfs_btree_cur fields for better packing Darrick J. Wong
2021-10-13  5:34   ` Dave Chinner
2021-10-12 23:33 ` [PATCH 07/15] xfs: refactor btree cursor allocation function Darrick J. Wong
2021-10-13  5:34   ` Dave Chinner
2021-10-12 23:33 ` [PATCH 08/15] xfs: encode the max btree height in the cursor Darrick J. Wong
2021-10-13  5:38   ` Dave Chinner
2021-10-12 23:33 ` [PATCH 09/15] xfs: dynamically allocate cursors based on maxlevels Darrick J. Wong
2021-10-13  5:40   ` Dave Chinner
2021-10-13 16:55     ` Darrick J. Wong
2021-10-12 23:33 ` [PATCH 10/15] xfs: compute actual maximum btree height for critical reservation calculation Darrick J. Wong
2021-10-13  5:49   ` Dave Chinner
2021-10-13 17:07     ` Darrick J. Wong
2021-10-13 20:18       ` Dave Chinner
2021-10-12 23:33 ` Darrick J. Wong [this message]
2021-10-13  7:25   ` [PATCH 11/15] xfs: compute the maximum height of the rmap btree when reflink enabled Dave Chinner
2021-10-13 17:47     ` Darrick J. Wong
2021-10-12 23:33 ` [PATCH 12/15] xfs: kill XFS_BTREE_MAXLEVELS Darrick J. Wong
2021-10-13  7:25   ` Dave Chinner
2021-10-12 23:33 ` [PATCH 13/15] xfs: widen btree maxlevels computation to handle 64-bit record counts Darrick J. Wong
2021-10-13  7:28   ` Dave Chinner
2021-10-12 23:33 ` [PATCH 14/15] xfs: compute absolute maximum nlevels for each btree type Darrick J. Wong
2021-10-13  7:57   ` Dave Chinner
2021-10-13 21:36     ` Darrick J. Wong
2021-10-13 23:48       ` Dave Chinner
2021-10-12 23:33 ` [PATCH 15/15] xfs: use separate btree cursor cache " Darrick J. Wong
2021-10-13  8:01   ` Dave Chinner
2021-10-13 21:42     ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=163408161434.4151249.874557928540897102.stgit@magnolia \
    --to=djwong@kernel.org \
    --cc=chandan.babu@oracle.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).