All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chandan Babu R <chandan.babu@oracle.com>
To: linux-xfs@vger.kernel.org
Cc: djwong@kernel.org, david@fromorbit.com, chandan.babu@oracle.com
Subject: [PATCH V9.2] xfs: Directory's data fork extent counter can never overflow
Date: Tue, 12 Apr 2022 19:32:37 +0530	[thread overview]
Message-ID: <20220412140237.1759506-1-chandan.babu@oracle.com> (raw)
In-Reply-To: <20220406061904.595597-16-chandan.babu@oracle.com>

The maximum file size that can be represented by the data fork extent counter
in the worst case occurs when all extents are 1 block in length and each block
is 1KB in size.

With XFS_MAX_EXTCNT_DATA_FORK_SMALL representing maximum extent count and with
1KB sized blocks, a file can reach upto,
(2^31) * 1KB = 2TB

This is much larger than the theoretical maximum size of a directory
i.e. XFS_DIR2_SPACE_SIZE * 3 = ~96GB.

Since a directory's inode can never overflow its data fork extent counter,
this commit removes all the overflow checks associated with
it. xfs_dinode_verify() now performs a rough check to verify if a diretory's
data fork is larger than 96GB.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c       | 20 -------------
 fs/xfs/libxfs/xfs_da_btree.h   |  1 +
 fs/xfs/libxfs/xfs_da_format.h  |  1 +
 fs/xfs/libxfs/xfs_dir2.c       |  8 +++++
 fs/xfs/libxfs/xfs_format.h     | 13 ++++++++
 fs/xfs/libxfs/xfs_inode_buf.c  |  3 ++
 fs/xfs/libxfs/xfs_inode_fork.h | 13 --------
 fs/xfs/xfs_inode.c             | 55 ++--------------------------------
 fs/xfs/xfs_symlink.c           |  5 ----
 9 files changed, 28 insertions(+), 91 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1254d4d4821e..4fab0c92ab70 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5147,26 +5147,6 @@ xfs_bmap_del_extent_real(
 		 * Deleting the middle of the extent.
 		 */
 
-		/*
-		 * For directories, -ENOSPC is returned since a directory entry
-		 * remove operation must not fail due to low extent count
-		 * availability. -ENOSPC will be handled by higher layers of XFS
-		 * by letting the corresponding empty Data/Free blocks to linger
-		 * until a future remove operation. Dabtree blocks would be
-		 * swapped with the last block in the leaf space and then the
-		 * new last block will be unmapped.
-		 *
-		 * The above logic also applies to the source directory entry of
-		 * a rename operation.
-		 */
-		error = xfs_iext_count_may_overflow(ip, whichfork, 1);
-		if (error) {
-			ASSERT(S_ISDIR(VFS_I(ip)->i_mode) &&
-				whichfork == XFS_DATA_FORK);
-			error = -ENOSPC;
-			goto done;
-		}
-
 		old = got;
 
 		got.br_blockcount = del->br_startoff - got.br_startoff;
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 0faf7d9ac241..7f08f6de48bf 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -30,6 +30,7 @@ struct xfs_da_geometry {
 	unsigned int	free_hdr_size;	/* dir2 free header size */
 	unsigned int	free_max_bests;	/* # of bests entries in dir2 free */
 	xfs_dablk_t	freeblk;	/* blockno of free data v2 */
+	xfs_extnum_t	max_extents;	/* Max. extents in corresponding fork */
 
 	xfs_dir2_data_aoff_t data_first_offset;
 	size_t		data_entry_offset;
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 5a49caa5c9df..95354b7ab7f5 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -277,6 +277,7 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
  * Directory address space divided into sections,
  * spaces separated by 32GB.
  */
+#define	XFS_DIR2_MAX_SPACES	3
 #define	XFS_DIR2_SPACE_SIZE	(1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
 #define	XFS_DIR2_DATA_SPACE	0
 #define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 5f1e4799e8fa..3cd51fa3837b 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -150,6 +150,8 @@ xfs_da_mount(
 	dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET);
 	dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) /
 				(uint)sizeof(xfs_da_node_entry_t);
+	dageo->max_extents = (XFS_DIR2_MAX_SPACES * XFS_DIR2_SPACE_SIZE) >>
+					mp->m_sb.sb_blocklog;
 	dageo->magicpct = (dageo->blksize * 37) / 100;
 
 	/* set up attribute geometry - single fsb only */
@@ -161,6 +163,12 @@ xfs_da_mount(
 	dageo->node_hdr_size = mp->m_dir_geo->node_hdr_size;
 	dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) /
 				(uint)sizeof(xfs_da_node_entry_t);
+
+	if (xfs_has_large_extent_counts(mp))
+		dageo->max_extents = XFS_MAX_EXTCNT_ATTR_FORK_LARGE;
+	else
+		dageo->max_extents = XFS_MAX_EXTCNT_ATTR_FORK_SMALL;
+
 	dageo->magicpct = (dageo->blksize * 37) / 100;
 	return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 82b404c99b80..43de892d0305 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -915,6 +915,19 @@ enum xfs_dinode_fmt {
  *
  * Rounding up 47 to the nearest multiple of bits-per-byte results in 48. Hence
  * 2^48 was chosen as the maximum data fork extent count.
+ *
+ * The maximum file size that can be represented by the data fork extent counter
+ * in the worst case occurs when all extents are 1 block in length and each
+ * block is 1KB in size.
+ *
+ * With XFS_MAX_EXTCNT_DATA_FORK_SMALL representing maximum extent count and
+ * with 1KB sized blocks, a file can reach upto,
+ * 1KB * (2^31) = 2TB
+ *
+ * This is much larger than the theoretical maximum size of a directory
+ * i.e. XFS_DIR2_SPACE_SIZE * XFS_DIR2_MAX_SPACES = ~96GB.
+ *
+ * Hence, a directory inode can never overflow its data fork extent counter.
  */
 #define XFS_MAX_EXTCNT_DATA_FORK_LARGE	((xfs_extnum_t)((1ULL << 48) - 1))
 #define XFS_MAX_EXTCNT_ATTR_FORK_LARGE	((xfs_extnum_t)((1ULL << 32) - 1))
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index ee8d4eb7d048..74b82ec80f8e 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -491,6 +491,9 @@ xfs_dinode_verify(
 	if (mode && nextents + naextents > nblocks)
 		return __this_address;
 
+	if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
+		return __this_address;
+
 	if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
 		return __this_address;
 
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index fd5c3c2d77e0..6f9d69f8896e 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -39,19 +39,6 @@ struct xfs_ifork {
  */
 #define XFS_IEXT_PUNCH_HOLE_CNT		(1)
 
-/*
- * Directory entry addition can cause the following,
- * 1. Data block can be added/removed.
- *    A new extent can cause extent count to increase by 1.
- * 2. Free disk block can be added/removed.
- *    Same behaviour as described above for Data block.
- * 3. Dabtree blocks.
- *    XFS_DA_NODE_MAXDEPTH blocks can be added. Each of these can be new
- *    extents. Hence extent count can increase by XFS_DA_NODE_MAXDEPTH.
- */
-#define XFS_IEXT_DIR_MANIP_CNT(mp) \
-	((XFS_DA_NODE_MAXDEPTH + 1 + 1) * (mp)->m_dir_geo->fsbcount)
-
 /*
  * Adding/removing an xattr can cause XFS_DA_NODE_MAXDEPTH extents to
  * be added. One extra extent for dabtree in case a local attr is
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index adc1355ce853..20f15a0393e1 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1024,11 +1024,6 @@ xfs_create(
 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
 	unlock_dp_on_error = true;
 
-	error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
-			XFS_IEXT_DIR_MANIP_CNT(mp));
-	if (error)
-		goto out_trans_cancel;
-
 	/*
 	 * A newly created regular or special file just has one directory
 	 * entry pointing to them, but a directory also the "." entry
@@ -1242,11 +1237,6 @@ xfs_link(
 	if (error)
 		goto std_return;
 
-	error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK,
-			XFS_IEXT_DIR_MANIP_CNT(mp));
-	if (error)
-		goto error_return;
-
 	/*
 	 * If we are using project inheritance, we only allow hard link
 	 * creation in our tree when the project IDs are the same; else
@@ -3210,35 +3200,6 @@ xfs_rename(
 	/*
 	 * Check for expected errors before we dirty the transaction
 	 * so we can return an error without a transaction abort.
-	 *
-	 * Extent count overflow check:
-	 *
-	 * From the perspective of src_dp, a rename operation is essentially a
-	 * directory entry remove operation. Hence the only place where we check
-	 * for extent count overflow for src_dp is in
-	 * xfs_bmap_del_extent_real(). xfs_bmap_del_extent_real() returns
-	 * -ENOSPC when it detects a possible extent count overflow and in
-	 * response, the higher layers of directory handling code do the
-	 * following:
-	 * 1. Data/Free blocks: XFS lets these blocks linger until a
-	 *    future remove operation removes them.
-	 * 2. Dabtree blocks: XFS swaps the blocks with the last block in the
-	 *    Leaf space and unmaps the last block.
-	 *
-	 * For target_dp, there are two cases depending on whether the
-	 * destination directory entry exists or not.
-	 *
-	 * When destination directory entry does not exist (i.e. target_ip ==
-	 * NULL), extent count overflow check is performed only when transaction
-	 * has a non-zero sized space reservation associated with it.  With a
-	 * zero-sized space reservation, XFS allows a rename operation to
-	 * continue only when the directory has sufficient free space in its
-	 * data/leaf/free space blocks to hold the new entry.
-	 *
-	 * When destination directory entry exists (i.e. target_ip != NULL), all
-	 * we need to do is change the inode number associated with the already
-	 * existing entry. Hence there is no need to perform an extent count
-	 * overflow check.
 	 */
 	if (target_ip == NULL) {
 		/*
@@ -3249,12 +3210,6 @@ xfs_rename(
 			error = xfs_dir_canenter(tp, target_dp, target_name);
 			if (error)
 				goto out_trans_cancel;
-		} else {
-			error = xfs_iext_count_may_overflow(target_dp,
-					XFS_DATA_FORK,
-					XFS_IEXT_DIR_MANIP_CNT(mp));
-			if (error)
-				goto out_trans_cancel;
 		}
 	} else {
 		/*
@@ -3422,18 +3377,12 @@ xfs_rename(
 	 * inode number of the whiteout inode rather than removing it
 	 * altogether.
 	 */
-	if (wip) {
+	if (wip)
 		error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
 					spaceres);
-	} else {
-		/*
-		 * NOTE: We don't need to check for extent count overflow here
-		 * because the dir remove name code will leave the dir block in
-		 * place if the extent count would overflow.
-		 */
+	else
 		error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
 					   spaceres);
-	}
 
 	if (error)
 		goto out_trans_cancel;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index affbedf78160..4145ba872547 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -226,11 +226,6 @@ xfs_symlink(
 		goto out_trans_cancel;
 	}
 
-	error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
-			XFS_IEXT_DIR_MANIP_CNT(mp));
-	if (error)
-		goto out_trans_cancel;
-
 	/*
 	 * Allocate an inode for the symlink.
 	 */
-- 
2.30.2


  parent reply	other threads:[~2022-04-12 14:03 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-06  6:18 [PATCH V9 00/19] xfs: Extend per-inode extent counters Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 01/19] xfs: Move extent count limits to xfs_format.h Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 02/19] xfs: Define max extent length based on on-disk format definition Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 03/19] xfs: Introduce xfs_iext_max_nextents() helper Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 04/19] xfs: Use xfs_extnum_t instead of basic data types Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 05/19] xfs: Introduce xfs_dfork_nextents() helper Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 06/19] xfs: Use basic types to define xfs_log_dinode's di_nextents and di_anextents Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 07/19] xfs: Promote xfs_extnum_t and xfs_aextnum_t to 64 and 32-bits respectively Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 08/19] xfs: Introduce XFS_SB_FEAT_INCOMPAT_NREXT64 and associated per-fs feature bit Chandan Babu R
2022-04-07  0:50   ` Dave Chinner
2022-04-06  6:18 ` [PATCH V9 09/19] xfs: Introduce XFS_FSOP_GEOM_FLAGS_NREXT64 Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 10/19] xfs: Introduce XFS_DIFLAG2_NREXT64 and associated helpers Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 11/19] xfs: Use uint64_t to count maximum blocks that can be used by BMBT Chandan Babu R
2022-04-07  0:52   ` Dave Chinner
2022-04-06  6:18 ` [PATCH V9 12/19] xfs: Introduce macros to represent new maximum extent counts for data/attr forks Chandan Babu R
2022-04-07  1:05   ` Dave Chinner
2022-04-07  1:58     ` Darrick J. Wong
2022-04-07  2:44       ` Dave Chinner
2022-04-07  8:18         ` Chandan Babu R
2022-04-07  8:56           ` Dave Chinner
2022-04-07  8:18       ` Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 13/19] xfs: Replace numbered inode recovery error messages with descriptive ones Chandan Babu R
2022-04-07  1:50   ` Darrick J. Wong
2022-04-06  6:18 ` [PATCH V9 14/19] xfs: Introduce per-inode 64-bit extent counters Chandan Babu R
2022-04-06 19:03   ` kernel test robot
2022-04-07  1:07     ` Dave Chinner
2022-04-07  1:07       ` Dave Chinner
2022-04-07  8:18       ` Chandan Babu R
2022-04-07  8:18         ` Chandan Babu R
2022-04-06  6:18 ` [PATCH V9 15/19] xfs: Directory's data fork extent counter can never overflow Chandan Babu R
2022-04-07  1:13   ` Dave Chinner
2022-04-07  1:48     ` Darrick J. Wong
2022-04-07  8:19       ` Chandan Babu R
2022-04-09 13:47   ` [PATCH V9.1] " Chandan Babu R
2022-04-11  1:33     ` Dave Chinner
2022-04-11 22:07     ` Darrick J. Wong
2022-04-12  3:39       ` Chandan Babu R
2022-04-12 14:02   ` Chandan Babu R [this message]
2022-04-12 17:04     ` [PATCH V9.2] " Darrick J. Wong
2022-04-06  6:19 ` [PATCH V9 16/19] xfs: Conditionally upgrade existing inodes to use large extent counters Chandan Babu R
2022-04-07  1:22   ` Dave Chinner
2022-04-07  1:46     ` Darrick J. Wong
2022-04-07  8:19     ` Chandan Babu R
2022-04-07  1:46   ` Darrick J. Wong
2022-04-07  2:00     ` Darrick J. Wong
2022-04-07  8:19     ` Chandan Babu R
2022-04-09 13:52   ` [PATCH V9.1] " Chandan Babu R
2022-04-11  1:34     ` Dave Chinner
2022-04-06  6:19 ` [PATCH V9 17/19] xfs: Decouple XFS_IBULK flags from XFS_IWALK flags Chandan Babu R
2022-04-07  1:29   ` Darrick J. Wong
2022-04-06  6:19 ` [PATCH V9 18/19] xfs: Enable bulkstat ioctl to support 64-bit per-inode extent counters Chandan Babu R
2022-04-07  1:29   ` Darrick J. Wong
2022-04-07  1:42     ` Dave Chinner
2022-04-07  8:20     ` Chandan Babu R
2022-04-09 13:57   ` [PATCH V9.1] " Chandan Babu R
2022-04-11  2:56     ` Dave Chinner
2022-04-13  2:57     ` Darrick J. Wong
2022-04-13  7:48       ` Chandan Babu R
2022-04-06  6:19 ` [PATCH V9 19/19] xfs: Add XFS_SB_FEAT_INCOMPAT_NREXT64 to the list of supported flags Chandan Babu R
2022-04-07  1:23   ` Dave Chinner
2022-04-07  1:26   ` Darrick J. Wong
2022-04-09 13:23 ` [PATCH V9.1] xfs: Introduce macros to represent new maximum extent counts for data/attr forks Chandan Babu R

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220412140237.1759506-1-chandan.babu@oracle.com \
    --to=chandan.babu@oracle.com \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.