linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2 0/5] xfs: speed up large directory modifications
@ 2019-08-29  6:30 Dave Chinner
  2019-08-29  6:30 ` [PATCH 1/5] xfs: move xfs_dir2_addname() Dave Chinner
                   ` (4 more replies)
  0 siblings, 5 replies; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  6:30 UTC (permalink / raw)
  To: linux-xfs

Hi folks,

After a long time I've managed to get back to these directory
speedup patches, originally posted here:

https://lore.kernel.org/linux-xfs/20181024225716.19459-1-david@fromorbit.com/

I've addressed all of Christoph's original issues, incorporated his
suggestions, updated the benchmark results (same/slightly better
improvement) and done more testing on it. The series has been in my
test tree since I posted it ~9 months ago and has been in all my
benchmarking work over that time. I haven't seen any performance
regression as a result of the change of algorithm, but there are a
few that go a lot faster....

Comments welcome.

-Dave.



^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/5] xfs: move xfs_dir2_addname()
  2019-08-29  6:30 [PATCH V2 0/5] xfs: speed up large directory modifications Dave Chinner
@ 2019-08-29  6:30 ` Dave Chinner
  2019-08-29  7:59   ` Christoph Hellwig
  2019-08-29  6:30 ` [PATCH 2/5] xfs: factor data block addition from xfs_dir2_node_addname_int() Dave Chinner
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  6:30 UTC (permalink / raw)
  To: linux-xfs

From: Dave Chinner <dchinner@redhat.com>

This gets rid of the need for a forward  declaration of the static
function xfs_dir2_addname_int() and readies the code for factoring
of xfs_dir2_addname_int().

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_dir2_node.c | 140 +++++++++++++++++-----------------
 1 file changed, 69 insertions(+), 71 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 1fc44efc344d..e40986cc0759 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -32,8 +32,6 @@ static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state,
 static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp,
 				 int index, xfs_da_state_blk_t *dblk,
 				 int *rval);
-static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
-				     xfs_da_state_blk_t *fblk);
 
 /*
  * Check internal consistency of a leafn block.
@@ -1610,75 +1608,6 @@ xfs_dir2_leafn_unbalance(
 	xfs_dir3_leaf_check(dp, drop_blk->bp);
 }
 
-/*
- * Top-level node form directory addname routine.
- */
-int						/* error */
-xfs_dir2_node_addname(
-	xfs_da_args_t		*args)		/* operation arguments */
-{
-	xfs_da_state_blk_t	*blk;		/* leaf block for insert */
-	int			error;		/* error return value */
-	int			rval;		/* sub-return value */
-	xfs_da_state_t		*state;		/* btree cursor */
-
-	trace_xfs_dir2_node_addname(args);
-
-	/*
-	 * Allocate and initialize the state (btree cursor).
-	 */
-	state = xfs_da_state_alloc();
-	state->args = args;
-	state->mp = args->dp->i_mount;
-	/*
-	 * Look up the name.  We're not supposed to find it, but
-	 * this gives us the insertion point.
-	 */
-	error = xfs_da3_node_lookup_int(state, &rval);
-	if (error)
-		rval = error;
-	if (rval != -ENOENT) {
-		goto done;
-	}
-	/*
-	 * Add the data entry to a data block.
-	 * Extravalid is set to a freeblock found by lookup.
-	 */
-	rval = xfs_dir2_node_addname_int(args,
-		state->extravalid ? &state->extrablk : NULL);
-	if (rval) {
-		goto done;
-	}
-	blk = &state->path.blk[state->path.active - 1];
-	ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
-	/*
-	 * Add the new leaf entry.
-	 */
-	rval = xfs_dir2_leafn_add(blk->bp, args, blk->index);
-	if (rval == 0) {
-		/*
-		 * It worked, fix the hash values up the btree.
-		 */
-		if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
-			xfs_da3_fixhashpath(state, &state->path);
-	} else {
-		/*
-		 * It didn't work, we need to split the leaf block.
-		 */
-		if (args->total == 0) {
-			ASSERT(rval == -ENOSPC);
-			goto done;
-		}
-		/*
-		 * Split the leaf block and insert the new entry.
-		 */
-		rval = xfs_da3_split(state);
-	}
-done:
-	xfs_da_state_free(state);
-	return rval;
-}
-
 /*
  * Add the data entry for a node-format directory name addition.
  * The leaf entry is added in xfs_dir2_leafn_add.
@@ -2056,6 +1985,75 @@ xfs_dir2_node_addname_int(
 	return 0;
 }
 
+/*
+ * Top-level node form directory addname routine.
+ */
+int						/* error */
+xfs_dir2_node_addname(
+	xfs_da_args_t		*args)		/* operation arguments */
+{
+	xfs_da_state_blk_t	*blk;		/* leaf block for insert */
+	int			error;		/* error return value */
+	int			rval;		/* sub-return value */
+	xfs_da_state_t		*state;		/* btree cursor */
+
+	trace_xfs_dir2_node_addname(args);
+
+	/*
+	 * Allocate and initialize the state (btree cursor).
+	 */
+	state = xfs_da_state_alloc();
+	state->args = args;
+	state->mp = args->dp->i_mount;
+	/*
+	 * Look up the name.  We're not supposed to find it, but
+	 * this gives us the insertion point.
+	 */
+	error = xfs_da3_node_lookup_int(state, &rval);
+	if (error)
+		rval = error;
+	if (rval != -ENOENT) {
+		goto done;
+	}
+	/*
+	 * Add the data entry to a data block.
+	 * Extravalid is set to a freeblock found by lookup.
+	 */
+	rval = xfs_dir2_node_addname_int(args,
+		state->extravalid ? &state->extrablk : NULL);
+	if (rval) {
+		goto done;
+	}
+	blk = &state->path.blk[state->path.active - 1];
+	ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+	/*
+	 * Add the new leaf entry.
+	 */
+	rval = xfs_dir2_leafn_add(blk->bp, args, blk->index);
+	if (rval == 0) {
+		/*
+		 * It worked, fix the hash values up the btree.
+		 */
+		if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
+			xfs_da3_fixhashpath(state, &state->path);
+	} else {
+		/*
+		 * It didn't work, we need to split the leaf block.
+		 */
+		if (args->total == 0) {
+			ASSERT(rval == -ENOSPC);
+			goto done;
+		}
+		/*
+		 * Split the leaf block and insert the new entry.
+		 */
+		rval = xfs_da3_split(state);
+	}
+done:
+	xfs_da_state_free(state);
+	return rval;
+}
+
 /*
  * Lookup an entry in a node-format directory.
  * All the real work happens in xfs_da3_node_lookup_int.
-- 
2.23.0.rc1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/5] xfs: factor data block addition from xfs_dir2_node_addname_int()
  2019-08-29  6:30 [PATCH V2 0/5] xfs: speed up large directory modifications Dave Chinner
  2019-08-29  6:30 ` [PATCH 1/5] xfs: move xfs_dir2_addname() Dave Chinner
@ 2019-08-29  6:30 ` Dave Chinner
  2019-08-29  8:05   ` Christoph Hellwig
  2019-08-29  6:30 ` [PATCH 3/5] xfs: factor free block index lookup " Dave Chinner
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  6:30 UTC (permalink / raw)
  To: linux-xfs

From: Dave Chinner <dchinner@redhat.com>

Factor out the code that adds a data block to a directory from
xfs_dir2_node_addname_int(). This makes the code flow cleaner and
more obvious and provides clear isolation of upcoming optimsations.

Signed-Off-By: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_dir2_node.c | 325 +++++++++++++++++-----------------
 1 file changed, 159 insertions(+), 166 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index e40986cc0759..747d10692bb6 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -1608,6 +1608,130 @@ xfs_dir2_leafn_unbalance(
 	xfs_dir3_leaf_check(dp, drop_blk->bp);
 }
 
+/*
+ * Add a new data block to the directory at the free space index that the caller
+ * has specified.
+ */
+static int
+xfs_dir2_node_add_datablk(
+	struct xfs_da_args	*args,
+	struct xfs_da_state_blk	*fblk,
+	xfs_dir2_db_t		*dbno,
+	struct xfs_buf		**dbpp,
+	struct xfs_buf		**fbpp,
+	int			*findex)
+{
+	struct xfs_inode	*dp = args->dp;
+	struct xfs_trans	*tp = args->trans;
+	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_dir3_icfree_hdr freehdr;
+	struct xfs_dir2_data_free *bf;
+	struct xfs_dir2_data_hdr *hdr;
+	struct xfs_dir2_free	*free = NULL;
+	xfs_dir2_db_t		fbno;
+	struct xfs_buf		*fbp;
+	struct xfs_buf		*dbp;
+	__be16			*bests = NULL;
+	int			error;
+
+	/* Not allowed to allocate, return failure. */
+	if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
+		return -ENOSPC;
+
+	/* Allocate and initialize the new data block.  */
+	error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, dbno);
+	if (error)
+		return error;
+	error = xfs_dir3_data_init(args, *dbno, &dbp);
+	if (error)
+		return error;
+
+	/*
+	 * Get the freespace block corresponding to the data block
+	 * that was just allocated.
+	 */
+	fbno = dp->d_ops->db_to_fdb(args->geo, *dbno);
+	error = xfs_dir2_free_try_read(tp, dp,
+			       xfs_dir2_db_to_da(args->geo, fbno), &fbp);
+	if (error)
+		return error;
+
+	/*
+	 * If there wasn't a freespace block, the read will
+	 * return a NULL fbp.  Allocate and initialize a new one.
+	 */
+	if (!fbp) {
+		error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fbno);
+		if (error)
+			return error;
+
+		if (dp->d_ops->db_to_fdb(args->geo, *dbno) != fbno) {
+			xfs_alert(mp,
+"%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld",
+				__func__, (unsigned long long)dp->i_ino,
+				(long long)dp->d_ops->db_to_fdb(args->geo, *dbno),
+				(long long)*dbno, (long long)fbno);
+			if (fblk) {
+				xfs_alert(mp,
+			" fblk "PTR_FMT" blkno %llu index %d magic 0x%x",
+					fblk, (unsigned long long)fblk->blkno,
+					fblk->index, fblk->magic);
+			} else {
+				xfs_alert(mp, " ... fblk is NULL");
+			}
+			XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
+					 XFS_ERRLEVEL_LOW, mp);
+			return -EFSCORRUPTED;
+		}
+
+		/* Get a buffer for the new block. */
+		error = xfs_dir3_free_get_buf(args, fbno, &fbp);
+		if (error)
+			return error;
+		free = fbp->b_addr;
+		bests = dp->d_ops->free_bests_p(free);
+		dp->d_ops->free_hdr_from_disk(&freehdr, free);
+
+		/* Remember the first slot as our empty slot. */
+		freehdr.firstdb = (fbno - xfs_dir2_byte_to_db(args->geo,
+							XFS_DIR2_FREE_OFFSET)) *
+				dp->d_ops->free_max_bests(args->geo);
+	} else {
+		free = fbp->b_addr;
+		bests = dp->d_ops->free_bests_p(free);
+		dp->d_ops->free_hdr_from_disk(&freehdr, free);
+	}
+
+	/* Set the freespace block index from the data block number. */
+	*findex = dp->d_ops->db_to_fdindex(args->geo, *dbno);
+
+	/* Extend the freespace table if the new data block is off the end. */
+	if (*findex >= freehdr.nvalid) {
+		ASSERT(*findex < dp->d_ops->free_max_bests(args->geo));
+		freehdr.nvalid = *findex + 1;
+		bests[*findex] = cpu_to_be16(NULLDATAOFF);
+	}
+
+	/*
+	 * If this entry was for an empty data block (this should always be
+	 * true) then update the header.
+	 */
+	if (bests[*findex] == cpu_to_be16(NULLDATAOFF)) {
+		freehdr.nused++;
+		dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
+		xfs_dir2_free_log_header(args, fbp);
+	}
+
+	/* Update the freespace value for the new block in the table. */
+	hdr = dbp->b_addr;
+	bf = dp->d_ops->data_bestfree_p(hdr);
+	bests[*findex] = bf[0].length;
+
+	*dbpp = dbp;
+	*fbpp = fbp;
+	return 0;
+}
+
 /*
  * Add the data entry for a node-format directory name addition.
  * The leaf entry is added in xfs_dir2_leafn_add.
@@ -1632,10 +1756,9 @@ xfs_dir2_node_addname_int(
 	xfs_dir2_db_t		ifbno;		/* initial freespace block no */
 	xfs_dir2_db_t		lastfbno=0;	/* highest freespace block no */
 	int			length;		/* length of the new entry */
-	int			logfree;	/* need to log free entry */
-	xfs_mount_t		*mp;		/* filesystem mount point */
-	int			needlog;	/* need to log data header */
-	int			needscan;	/* need to rescan data frees */
+	int			logfree = 0;	/* need to log free entry */
+	int			needlog = 0;	/* need to log data header */
+	int			needscan = 0;	/* need to rescan data frees */
 	__be16			*tagp;		/* data entry tag pointer */
 	xfs_trans_t		*tp;		/* transaction pointer */
 	__be16			*bests;
@@ -1644,7 +1767,6 @@ xfs_dir2_node_addname_int(
 	xfs_dir2_data_aoff_t	aoff;
 
 	dp = args->dp;
-	mp = dp->i_mount;
 	tp = args->trans;
 	length = dp->d_ops->data_entsize(args->namelen);
 	/*
@@ -1673,6 +1795,7 @@ xfs_dir2_node_addname_int(
 			ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
 			ASSERT(be16_to_cpu(bests[findex]) >= length);
 			dbno = freehdr.firstdb + findex;
+			goto found_block;
 		} else {
 			/*
 			 * The data block looked at didn't have enough room.
@@ -1774,168 +1897,46 @@ xfs_dir2_node_addname_int(
 			}
 		}
 	}
+
 	/*
 	 * If we don't have a data block, we need to allocate one and make
 	 * the freespace entries refer to it.
 	 */
-	if (unlikely(dbno == -1)) {
-		/*
-		 * Not allowed to allocate, return failure.
-		 */
-		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
-			return -ENOSPC;
-
-		/*
-		 * Allocate and initialize the new data block.
-		 */
-		if (unlikely((error = xfs_dir2_grow_inode(args,
-							 XFS_DIR2_DATA_SPACE,
-							 &dbno)) ||
-		    (error = xfs_dir3_data_init(args, dbno, &dbp))))
-			return error;
-
-		/*
-		 * If (somehow) we have a freespace block, get rid of it.
-		 */
-		if (fbp)
-			xfs_trans_brelse(tp, fbp);
-		if (fblk && fblk->bp)
-			fblk->bp = NULL;
-
-		/*
-		 * Get the freespace block corresponding to the data block
-		 * that was just allocated.
-		 */
-		fbno = dp->d_ops->db_to_fdb(args->geo, dbno);
-		error = xfs_dir2_free_try_read(tp, dp,
-				       xfs_dir2_db_to_da(args->geo, fbno),
-				       &fbp);
+	if (dbno == -1) {
+		error = xfs_dir2_node_add_datablk(args, fblk, &dbno, &dbp, &fbp,
+						  &findex);
 		if (error)
 			return error;
 
-		/*
-		 * If there wasn't a freespace block, the read will
-		 * return a NULL fbp.  Allocate and initialize a new one.
-		 */
-		if (!fbp) {
-			error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE,
-						    &fbno);
-			if (error)
-				return error;
-
-			if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) {
-				xfs_alert(mp,
-"%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld ifbno %llu lastfbno %d",
-					__func__, (unsigned long long)dp->i_ino,
-					(long long)dp->d_ops->db_to_fdb(
-								args->geo, dbno),
-					(long long)dbno, (long long)fbno,
-					(unsigned long long)ifbno, lastfbno);
-				if (fblk) {
-					xfs_alert(mp,
-				" fblk "PTR_FMT" blkno %llu index %d magic 0x%x",
-						fblk,
-						(unsigned long long)fblk->blkno,
-						fblk->index,
-						fblk->magic);
-				} else {
-					xfs_alert(mp, " ... fblk is NULL");
-				}
-				XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
-						 XFS_ERRLEVEL_LOW, mp);
-				return -EFSCORRUPTED;
-			}
-
-			/*
-			 * Get a buffer for the new block.
-			 */
-			error = xfs_dir3_free_get_buf(args, fbno, &fbp);
-			if (error)
-				return error;
-			free = fbp->b_addr;
-			bests = dp->d_ops->free_bests_p(free);
-			dp->d_ops->free_hdr_from_disk(&freehdr, free);
-
-			/*
-			 * Remember the first slot as our empty slot.
-			 */
-			freehdr.firstdb =
-				(fbno - xfs_dir2_byte_to_db(args->geo,
-							XFS_DIR2_FREE_OFFSET)) *
-					dp->d_ops->free_max_bests(args->geo);
-		} else {
-			free = fbp->b_addr;
-			bests = dp->d_ops->free_bests_p(free);
-			dp->d_ops->free_hdr_from_disk(&freehdr, free);
-		}
+		/* setup current free block buffer */
+		free = fbp->b_addr;
 
-		/*
-		 * Set the freespace block index from the data block number.
-		 */
-		findex = dp->d_ops->db_to_fdindex(args->geo, dbno);
-		/*
-		 * If it's after the end of the current entries in the
-		 * freespace block, extend that table.
-		 */
-		if (findex >= freehdr.nvalid) {
-			ASSERT(findex < dp->d_ops->free_max_bests(args->geo));
-			freehdr.nvalid = findex + 1;
-			/*
-			 * Tag new entry so nused will go up.
-			 */
-			bests[findex] = cpu_to_be16(NULLDATAOFF);
-		}
-		/*
-		 * If this entry was for an empty data block
-		 * (this should always be true) then update the header.
-		 */
-		if (bests[findex] == cpu_to_be16(NULLDATAOFF)) {
-			freehdr.nused++;
-			dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
-			xfs_dir2_free_log_header(args, fbp);
-		}
-		/*
-		 * Update the real value in the table.
-		 * We haven't allocated the data entry yet so this will
-		 * change again.
-		 */
-		hdr = dbp->b_addr;
-		bf = dp->d_ops->data_bestfree_p(hdr);
-		bests[findex] = bf[0].length;
+		/* we're going to have to log the free block index later */
 		logfree = 1;
-	}
-	/*
-	 * We had a data block so we don't have to make a new one.
-	 */
-	else {
-		/*
-		 * If just checking, we succeeded.
-		 */
+	} else {
+found_block:
+		/* If just checking, we succeeded. */
 		if (args->op_flags & XFS_DA_OP_JUSTCHECK)
 			return 0;
 
-		/*
-		 * Read the data block in.
-		 */
+		/* Read the data block in. */
 		error = xfs_dir3_data_read(tp, dp,
 					   xfs_dir2_db_to_da(args->geo, dbno),
 					   -1, &dbp);
 		if (error)
 			return error;
-		hdr = dbp->b_addr;
-		bf = dp->d_ops->data_bestfree_p(hdr);
-		logfree = 0;
 	}
+
+	/* setup for data block up now */
+	hdr = dbp->b_addr;
+	bf = dp->d_ops->data_bestfree_p(hdr);
 	ASSERT(be16_to_cpu(bf[0].length) >= length);
-	/*
-	 * Point to the existing unused space.
-	 */
+
+	/* Point to the existing unused space. */
 	dup = (xfs_dir2_data_unused_t *)
 	      ((char *)hdr + be16_to_cpu(bf[0].offset));
-	needscan = needlog = 0;
-	/*
-	 * Mark the first part of the unused space, inuse for us.
-	 */
+
+	/* Mark the first part of the unused space, inuse for us. */
 	aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
 	error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length,
 			&needlog, &needscan);
@@ -1943,9 +1944,8 @@ xfs_dir2_node_addname_int(
 		xfs_trans_brelse(tp, dbp);
 		return error;
 	}
-	/*
-	 * Fill in the new entry and log it.
-	 */
+
+	/* Fill in the new entry and log it. */
 	dep = (xfs_dir2_data_entry_t *)dup;
 	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
@@ -1954,32 +1954,25 @@ xfs_dir2_node_addname_int(
 	tagp = dp->d_ops->data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
 	xfs_dir2_data_log_entry(args, dbp, dep);
-	/*
-	 * Rescan the block for bestfree if needed.
-	 */
+
+	/* Rescan the freespace and log the data block if needed. */
 	if (needscan)
 		xfs_dir2_data_freescan(dp, hdr, &needlog);
-	/*
-	 * Log the data block header if needed.
-	 */
 	if (needlog)
 		xfs_dir2_data_log_header(args, dbp);
-	/*
-	 * If the freespace entry is now wrong, update it.
-	 */
-	bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */
-	if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) {
+
+	/* If the freespace block entry is now wrong, update it. */
+	bests = dp->d_ops->free_bests_p(free);
+	if (bests[findex] != bf[0].length) {
 		bests[findex] = bf[0].length;
 		logfree = 1;
 	}
-	/*
-	 * Log the freespace entry if needed.
-	 */
+
+	/* Log the freespace entry if needed. */
 	if (logfree)
 		xfs_dir2_free_log_bests(args, fbp, findex, findex);
-	/*
-	 * Return the data block and offset in args, then drop the data block.
-	 */
+
+	/* Return the data block and offset in args. */
 	args->blkno = (xfs_dablk_t)dbno;
 	args->index = be16_to_cpu(*tagp);
 	return 0;
-- 
2.23.0.rc1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 3/5] xfs: factor free block index lookup from xfs_dir2_node_addname_int()
  2019-08-29  6:30 [PATCH V2 0/5] xfs: speed up large directory modifications Dave Chinner
  2019-08-29  6:30 ` [PATCH 1/5] xfs: move xfs_dir2_addname() Dave Chinner
  2019-08-29  6:30 ` [PATCH 2/5] xfs: factor data block addition from xfs_dir2_node_addname_int() Dave Chinner
@ 2019-08-29  6:30 ` Dave Chinner
  2019-08-29  8:10   ` Christoph Hellwig
  2019-08-29  6:30 ` [PATCH 4/5] xfs: speed up directory bestfree block scanning Dave Chinner
  2019-08-29  6:30 ` [PATCH 5/5] xfs: reverse search directory freespace indexes Dave Chinner
  4 siblings, 1 reply; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  6:30 UTC (permalink / raw)
  To: linux-xfs

From: Dave Chinner <dchinner@redhat.com>

Simplify the logic in xfs_dir2_node_addname_int() by factoring out
the free block index lookup code that finds a block with enough free
space for the entry to be added. The code that is moved gets a major
cleanup at the same time, but there is no algorithm change here.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_dir2_node.c | 196 ++++++++++++++++++----------------
 1 file changed, 103 insertions(+), 93 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 747d10692bb6..1d3d1c9b5961 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -1635,7 +1635,7 @@ xfs_dir2_node_add_datablk(
 	int			error;
 
 	/* Not allowed to allocate, return failure. */
-	if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
+	if (args->total == 0)
 		return -ENOSPC;
 
 	/* Allocate and initialize the new data block.  */
@@ -1732,43 +1732,29 @@ xfs_dir2_node_add_datablk(
 	return 0;
 }
 
-/*
- * Add the data entry for a node-format directory name addition.
- * The leaf entry is added in xfs_dir2_leafn_add.
- * We may enter with a freespace block that the lookup found.
- */
-static int					/* error */
-xfs_dir2_node_addname_int(
-	xfs_da_args_t		*args,		/* operation arguments */
-	xfs_da_state_blk_t	*fblk)		/* optional freespace block */
+static int
+xfs_dir2_node_find_freeblk(
+	struct xfs_da_args	*args,
+	struct xfs_da_state_blk	*fblk,
+	xfs_dir2_db_t		*dbnop,
+	struct xfs_buf		**fbpp,
+	int			*findexp,
+	int			length)
 {
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
-	xfs_dir2_db_t		dbno;		/* data block number */
-	struct xfs_buf		*dbp;		/* data block buffer */
-	xfs_dir2_data_entry_t	*dep;		/* data entry pointer */
-	xfs_inode_t		*dp;		/* incore directory inode */
-	xfs_dir2_data_unused_t	*dup;		/* data unused entry pointer */
-	int			error;		/* error return value */
-	xfs_dir2_db_t		fbno;		/* freespace block number */
-	struct xfs_buf		*fbp;		/* freespace buffer */
-	int			findex;		/* freespace entry index */
-	xfs_dir2_free_t		*free=NULL;	/* freespace block structure */
-	xfs_dir2_db_t		ifbno;		/* initial freespace block no */
-	xfs_dir2_db_t		lastfbno=0;	/* highest freespace block no */
-	int			length;		/* length of the new entry */
-	int			logfree = 0;	/* need to log free entry */
-	int			needlog = 0;	/* need to log data header */
-	int			needscan = 0;	/* need to rescan data frees */
-	__be16			*tagp;		/* data entry tag pointer */
-	xfs_trans_t		*tp;		/* transaction pointer */
-	__be16			*bests;
 	struct xfs_dir3_icfree_hdr freehdr;
-	struct xfs_dir2_data_free *bf;
-	xfs_dir2_data_aoff_t	aoff;
+	struct xfs_dir2_free	*free = NULL;
+	struct xfs_inode	*dp = args->dp;
+	struct xfs_trans	*tp = args->trans;
+	struct xfs_buf		*fbp = NULL;
+	xfs_dir2_db_t		lastfbno;
+	xfs_dir2_db_t		ifbno = -1;
+	xfs_dir2_db_t		dbno = -1;
+	xfs_dir2_db_t		fbno = -1;
+	xfs_fileoff_t		fo;
+	__be16			*bests;
+	int			findex;
+	int			error;
 
-	dp = args->dp;
-	tp = args->trans;
-	length = dp->d_ops->data_entsize(args->namelen);
 	/*
 	 * If we came in with a freespace block that means that lookup
 	 * found an entry with our hash value.  This is the freespace
@@ -1776,56 +1762,44 @@ xfs_dir2_node_addname_int(
 	 */
 	if (fblk) {
 		fbp = fblk->bp;
-		/*
-		 * Remember initial freespace block number.
-		 */
-		ifbno = fblk->blkno;
 		free = fbp->b_addr;
 		findex = fblk->index;
-		bests = dp->d_ops->free_bests_p(free);
-		dp->d_ops->free_hdr_from_disk(&freehdr, free);
-
-		/*
-		 * This means the free entry showed that the data block had
-		 * space for our entry, so we remembered it.
-		 * Use that data block.
-		 */
 		if (findex >= 0) {
+			/* caller already found the freespace for us. */
+			bests = dp->d_ops->free_bests_p(free);
+			dp->d_ops->free_hdr_from_disk(&freehdr, free);
+
 			ASSERT(findex < freehdr.nvalid);
 			ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
 			ASSERT(be16_to_cpu(bests[findex]) >= length);
 			dbno = freehdr.firstdb + findex;
-			goto found_block;
-		} else {
-			/*
-			 * The data block looked at didn't have enough room.
-			 * We'll start at the beginning of the freespace entries.
-			 */
-			dbno = -1;
-			findex = 0;
+			goto out;
 		}
-	} else {
+
 		/*
-		 * Didn't come in with a freespace block, so no data block.
+		 * The data block looked at didn't have enough room.
+		 * We'll start at the beginning of the freespace entries.
 		 */
-		ifbno = dbno = -1;
-		fbp = NULL;
-		findex = 0;
+		ifbno = fblk->blkno;
+		fbno = ifbno;
 	}
+	ASSERT(dbno == -1);
+	findex = 0;
 
 	/*
-	 * If we don't have a data block yet, we're going to scan the
-	 * freespace blocks looking for one.  Figure out what the
-	 * highest freespace block number is.
+	 * If we don't have a data block yet, we're going to scan the freespace
+	 * blocks looking for one.  Figure out what the highest freespace block
+	 * number is.
 	 */
-	if (dbno == -1) {
-		xfs_fileoff_t	fo;		/* freespace block number */
+	error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK);
+	if (error)
+		return error;
+	lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
+
+	/* If we haven't get a search start block, set it now */
+	if (fbno == -1)
+		fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
 
-		if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK)))
-			return error;
-		lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
-		fbno = ifbno;
-	}
 	/*
 	 * While we haven't identified a data block, search the freeblock
 	 * data for a good data block.  If we find a null freeblock entry,
@@ -1836,17 +1810,10 @@ xfs_dir2_node_addname_int(
 		 * If we don't have a freeblock in hand, get the next one.
 		 */
 		if (fbp == NULL) {
-			/*
-			 * Happens the first time through unless lookup gave
-			 * us a freespace block to start with.
-			 */
-			if (++fbno == 0)
-				fbno = xfs_dir2_byte_to_db(args->geo,
-							XFS_DIR2_FREE_OFFSET);
 			/*
 			 * If it's ifbno we already looked at it.
 			 */
-			if (fbno == ifbno)
+			if (++fbno == ifbno)
 				fbno++;
 			/*
 			 * If it's off the end we're done.
@@ -1897,35 +1864,77 @@ xfs_dir2_node_addname_int(
 			}
 		}
 	}
+out:
+	*dbnop = dbno;
+	*fbpp = fbp;
+	*findexp = findex;
+	return 0;
+}
+
+
+/*
+ * Add the data entry for a node-format directory name addition.
+ * The leaf entry is added in xfs_dir2_leafn_add.
+ * We may enter with a freespace block that the lookup found.
+ */
+static int
+xfs_dir2_node_addname_int(
+	struct xfs_da_args	*args,		/* operation arguments */
+	struct xfs_da_state_blk	*fblk)		/* optional freespace block */
+{
+	struct xfs_dir2_data_unused *dup;	/* data unused entry pointer */
+	struct xfs_dir2_data_entry *dep;	/* data entry pointer */
+	struct xfs_dir2_data_hdr *hdr;		/* data block header */
+	struct xfs_dir2_data_free *bf;
+	struct xfs_dir2_free	*free = NULL;	/* freespace block structure */
+	struct xfs_trans	*tp = args->trans;
+	struct xfs_inode	*dp = args->dp;
+	struct xfs_buf		*dbp;		/* data block buffer */
+	struct xfs_buf		*fbp;		/* freespace buffer */
+	xfs_dir2_data_aoff_t	aoff;
+	xfs_dir2_db_t		dbno;		/* data block number */
+	int			error;		/* error return value */
+	int			findex;		/* freespace entry index */
+	int			length;		/* length of the new entry */
+	int			logfree = 0;	/* need to log free entry */
+	int			needlog = 0;	/* need to log data header */
+	int			needscan = 0;	/* need to rescan data frees */
+	__be16			*tagp;		/* data entry tag pointer */
+	__be16			*bests;
+
+	length = dp->d_ops->data_entsize(args->namelen);
+	error = xfs_dir2_node_find_freeblk(args, fblk, &dbno, &fbp, &findex,
+					   length);
+	if (error)
+		return error;
+
+	/*
+	 * Now we know if we must allocate blocks, so if we are checking whether
+	 * we can insert without allocation then we can return now.
+	 */
+	if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
+		if (dbno != -1)
+			return 0;
+		return -ENOSPC;
+	}
 
 	/*
 	 * If we don't have a data block, we need to allocate one and make
 	 * the freespace entries refer to it.
 	 */
 	if (dbno == -1) {
-		error = xfs_dir2_node_add_datablk(args, fblk, &dbno, &dbp, &fbp,
-						  &findex);
-		if (error)
-			return error;
-
-		/* setup current free block buffer */
-		free = fbp->b_addr;
-
 		/* we're going to have to log the free block index later */
 		logfree = 1;
+		error = xfs_dir2_node_add_datablk(args, fblk, &dbno, &dbp, &fbp,
+						  &findex);
 	} else {
-found_block:
-		/* If just checking, we succeeded. */
-		if (args->op_flags & XFS_DA_OP_JUSTCHECK)
-			return 0;
-
 		/* Read the data block in. */
 		error = xfs_dir3_data_read(tp, dp,
 					   xfs_dir2_db_to_da(args->geo, dbno),
 					   -1, &dbp);
-		if (error)
-			return error;
 	}
+	if (error)
+		return error;
 
 	/* setup for data block up now */
 	hdr = dbp->b_addr;
@@ -1962,6 +1971,7 @@ xfs_dir2_node_addname_int(
 		xfs_dir2_data_log_header(args, dbp);
 
 	/* If the freespace block entry is now wrong, update it. */
+	free = fbp->b_addr;
 	bests = dp->d_ops->free_bests_p(free);
 	if (bests[findex] != bf[0].length) {
 		bests[findex] = bf[0].length;
-- 
2.23.0.rc1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/5] xfs: speed up directory bestfree block scanning
  2019-08-29  6:30 [PATCH V2 0/5] xfs: speed up large directory modifications Dave Chinner
                   ` (2 preceding siblings ...)
  2019-08-29  6:30 ` [PATCH 3/5] xfs: factor free block index lookup " Dave Chinner
@ 2019-08-29  6:30 ` Dave Chinner
  2019-08-29  8:18   ` Christoph Hellwig
  2019-08-29  8:25   ` Christoph Hellwig
  2019-08-29  6:30 ` [PATCH 5/5] xfs: reverse search directory freespace indexes Dave Chinner
  4 siblings, 2 replies; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  6:30 UTC (permalink / raw)
  To: linux-xfs

From: Dave Chinner <dchinner@redhat.com>

When running a "create millions inodes in a directory" test
recently, I noticed we were spending a huge amount of time
converting freespace block headers from disk format to in-memory
format:

 31.47%  [kernel]  [k] xfs_dir2_node_addname
 17.86%  [kernel]  [k] xfs_dir3_free_hdr_from_disk
  3.55%  [kernel]  [k] xfs_dir3_free_bests_p

We shouldn't be hitting the best free block scanning code so hard
when doing sequential directory creates, and it turns out there's
a highly suboptimal loop searching the the best free array in
the freespace block - it decodes the block header before checking
each entry inside a loop, instead of decoding the header once before
running the entry search loop.

This makes a massive difference to create rates. Profile now looks
like this:

  13.15%  [kernel]  [k] xfs_dir2_node_addname
   3.52%  [kernel]  [k] xfs_dir3_leaf_check_int
   3.11%  [kernel]  [k] xfs_log_commit_cil

And the wall time/average file create rate differences are
just as stark:

		create time(sec) / rate (files/s)
File count	     vanilla		    patched
  10k		   0.41 / 24.3k		   0.42 / 23.8k
  20k		   0.74	/ 27.0k		   0.76 / 26.3k
 100k		   3.81	/ 26.4k		   3.47 / 28.8k
 200k		   8.58	/ 23.3k		   7.19 / 27.8k
   1M		  85.69	/ 11.7k		  48.53 / 20.6k
   2M		 280.31	/  7.1k		 130.14 / 15.3k

The larger the directory, the bigger the performance improvement.

Signed-Off-By: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_dir2_node.c | 101 +++++++++++++---------------------
 1 file changed, 37 insertions(+), 64 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 1d3d1c9b5961..c6a1d1cc638f 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -1751,8 +1751,8 @@ xfs_dir2_node_find_freeblk(
 	xfs_dir2_db_t		dbno = -1;
 	xfs_dir2_db_t		fbno = -1;
 	xfs_fileoff_t		fo;
-	__be16			*bests;
-	int			findex;
+	__be16			*bests = NULL;
+	int			findex = 0;
 	int			error;
 
 	/*
@@ -1764,16 +1764,15 @@ xfs_dir2_node_find_freeblk(
 		fbp = fblk->bp;
 		free = fbp->b_addr;
 		findex = fblk->index;
+		bests = dp->d_ops->free_bests_p(free);
+		dp->d_ops->free_hdr_from_disk(&freehdr, free);
 		if (findex >= 0) {
 			/* caller already found the freespace for us. */
-			bests = dp->d_ops->free_bests_p(free);
-			dp->d_ops->free_hdr_from_disk(&freehdr, free);
-
 			ASSERT(findex < freehdr.nvalid);
 			ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
 			ASSERT(be16_to_cpu(bests[findex]) >= length);
 			dbno = freehdr.firstdb + findex;
-			goto out;
+			goto found_block;
 		}
 
 		/*
@@ -1783,8 +1782,6 @@ xfs_dir2_node_find_freeblk(
 		ifbno = fblk->blkno;
 		fbno = ifbno;
 	}
-	ASSERT(dbno == -1);
-	findex = 0;
 
 	/*
 	 * If we don't have a data block yet, we're going to scan the freespace
@@ -1802,69 +1799,45 @@ xfs_dir2_node_find_freeblk(
 
 	/*
 	 * While we haven't identified a data block, search the freeblock
-	 * data for a good data block.  If we find a null freeblock entry,
-	 * indicating a hole in the data blocks, remember that.
+	 * data for a data block with enough free space in it.
 	 */
-	while (dbno == -1) {
-		/*
-		 * If we don't have a freeblock in hand, get the next one.
-		 */
-		if (fbp == NULL) {
-			/*
-			 * If it's ifbno we already looked at it.
-			 */
-			if (++fbno == ifbno)
-				fbno++;
-			/*
-			 * If it's off the end we're done.
-			 */
-			if (fbno >= lastfbno)
-				break;
-			/*
-			 * Read the block.  There can be holes in the
-			 * freespace blocks, so this might not succeed.
-			 * This should be really rare, so there's no reason
-			 * to avoid it.
-			 */
-			error = xfs_dir2_free_try_read(tp, dp,
-					xfs_dir2_db_to_da(args->geo, fbno),
-					&fbp);
-			if (error)
-				return error;
-			if (!fbp)
-				continue;
-			free = fbp->b_addr;
-			findex = 0;
-		}
+	for ( ; fbno < lastfbno; fbno++) {
+		/* If it's ifbno we already looked at it. */
+		if (fbno == ifbno)
+			continue;
+
 		/*
-		 * Look at the current free entry.  Is it good enough?
-		 *
-		 * The bests initialisation should be where the bufer is read in
-		 * the above branch. But gcc is too stupid to realise that bests
-		 * and the freehdr are actually initialised if they are placed
-		 * there, so we have to do it here to avoid warnings. Blech.
+		 * Read the block.  There can be holes in the freespace blocks,
+		 * so this might not succeed.  This should be really rare, so
+		 * there's no reason to avoid it.
 		 */
+		error = xfs_dir2_free_try_read(tp, dp,
+				xfs_dir2_db_to_da(args->geo, fbno),
+				&fbp);
+		if (error)
+			return error;
+		if (!fbp)
+			continue;
+
+		findex = 0;
+		free = fbp->b_addr;
 		bests = dp->d_ops->free_bests_p(free);
 		dp->d_ops->free_hdr_from_disk(&freehdr, free);
-		if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
-		    be16_to_cpu(bests[findex]) >= length)
-			dbno = freehdr.firstdb + findex;
-		else {
-			/*
-			 * Are we done with the freeblock?
-			 */
-			if (++findex == freehdr.nvalid) {
-				/*
-				 * Drop the block.
-				 */
-				xfs_trans_brelse(tp, fbp);
-				fbp = NULL;
-				if (fblk && fblk->bp)
-					fblk->bp = NULL;
+
+		/* Scan the free entry array for a large enough free space. */
+		do {
+			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
+			    be16_to_cpu(bests[findex]) >= length) {
+				dbno = freehdr.firstdb + findex;
+				goto found_block;
 			}
-		}
+		} while (++findex < freehdr.nvalid);
+
+		/* Didn't find free space, go on to next free block */
+		xfs_trans_brelse(tp, fbp);
 	}
-out:
+
+found_block:
 	*dbnop = dbno;
 	*fbpp = fbp;
 	*findexp = findex;
-- 
2.23.0.rc1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 5/5] xfs: reverse search directory freespace indexes
  2019-08-29  6:30 [PATCH V2 0/5] xfs: speed up large directory modifications Dave Chinner
                   ` (3 preceding siblings ...)
  2019-08-29  6:30 ` [PATCH 4/5] xfs: speed up directory bestfree block scanning Dave Chinner
@ 2019-08-29  6:30 ` Dave Chinner
  2019-08-29  8:23   ` Christoph Hellwig
  4 siblings, 1 reply; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  6:30 UTC (permalink / raw)
  To: linux-xfs

From: Dave Chinner <dchinner@redhat.com>

When a directory is growing rapidly, new blocks tend to get added at
the end of the directory. These end up at the end of the freespace
index, and when the directory gets large finding these new
freespaces gets expensive. The code does a linear search across the
frespace index from the first block in the directory to the last,
hence meaning the newly added space is the last index searched.

Instead, do a reverse order index search, starting from the last
block and index in the freespace index. This makes most lookups for
free space on rapidly growing directories O(1) instead of O(N), but
should not have any impact on random insert workloads because the
average search length is the same regardless of which end of the
array we start at.

The result is a major improvement in large directory grow rates:

		create time(sec) / rate (files/s)
 File count     vanilla             Prev commit		Patched
  10k	      0.41 / 24.3k	   0.42 / 23.8k       0.41 / 24.3k
  20k	      0.74 / 27.0k	   0.76 / 26.3k       0.75 / 26.7k
 100k	      3.81 / 26.4k	   3.47 / 28.8k       3.27 / 30.6k
 200k	      8.58 / 23.3k	   7.19 / 27.8k       6.71 / 29.8k
   1M	     85.69 / 11.7k	  48.53 / 20.6k      37.67 / 26.5k
   2M	    280.31 /  7.1k	 130.14 / 15.3k      79.55 / 25.2k
  10M	   3913.26 /  2.5k                          552.89 / 18.1k

Signed-Off-By: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_dir2_node.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index c6a1d1cc638f..d8abbcbde055 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -1746,6 +1746,7 @@ xfs_dir2_node_find_freeblk(
 	struct xfs_inode	*dp = args->dp;
 	struct xfs_trans	*tp = args->trans;
 	struct xfs_buf		*fbp = NULL;
+	xfs_dir2_db_t		firstfbno;
 	xfs_dir2_db_t		lastfbno;
 	xfs_dir2_db_t		ifbno = -1;
 	xfs_dir2_db_t		dbno = -1;
@@ -1781,6 +1782,9 @@ xfs_dir2_node_find_freeblk(
 		 */
 		ifbno = fblk->blkno;
 		fbno = ifbno;
+		xfs_trans_brelse(tp, fbp);
+		fbp = NULL;
+		fblk->bp = NULL;
 	}
 
 	/*
@@ -1792,16 +1796,15 @@ xfs_dir2_node_find_freeblk(
 	if (error)
 		return error;
 	lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
-
-	/* If we haven't get a search start block, set it now */
-	if (fbno == -1)
-		fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
+	firstfbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
 
 	/*
 	 * While we haven't identified a data block, search the freeblock
-	 * data for a data block with enough free space in it.
+	 * data for a good data block. Do a reverse order search, as growing
+	 * directories will put new blocks with free space at the end of the
+	 * free space index.
 	 */
-	for ( ; fbno < lastfbno; fbno++) {
+	for (fbno = lastfbno - 1; fbno >= firstfbno; fbno--) {
 		/* If it's ifbno we already looked at it. */
 		if (fbno == ifbno)
 			continue;
@@ -1819,19 +1822,18 @@ xfs_dir2_node_find_freeblk(
 		if (!fbp)
 			continue;
 
-		findex = 0;
 		free = fbp->b_addr;
 		bests = dp->d_ops->free_bests_p(free);
 		dp->d_ops->free_hdr_from_disk(&freehdr, free);
 
 		/* Scan the free entry array for a large enough free space. */
-		do {
+		for (findex = freehdr.nvalid - 1; findex >= 0; findex--) {
 			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
 			    be16_to_cpu(bests[findex]) >= length) {
 				dbno = freehdr.firstdb + findex;
 				goto found_block;
 			}
-		} while (++findex < freehdr.nvalid);
+		};
 
 		/* Didn't find free space, go on to next free block */
 		xfs_trans_brelse(tp, fbp);
-- 
2.23.0.rc1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/5] xfs: move xfs_dir2_addname()
  2019-08-29  6:30 ` [PATCH 1/5] xfs: move xfs_dir2_addname() Dave Chinner
@ 2019-08-29  7:59   ` Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2019-08-29  7:59 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 04:30:38PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> This gets rid of the need for a forward  declaration of the static
> function xfs_dir2_addname_int() and readies the code for factoring
> of xfs_dir2_addname_int().
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/5] xfs: factor data block addition from xfs_dir2_node_addname_int()
  2019-08-29  6:30 ` [PATCH 2/5] xfs: factor data block addition from xfs_dir2_node_addname_int() Dave Chinner
@ 2019-08-29  8:05   ` Christoph Hellwig
  2019-08-29  8:34     ` Dave Chinner
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2019-08-29  8:05 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 04:30:39PM +1000, Dave Chinner wrote:
> +			XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
> +					 XFS_ERRLEVEL_LOW, mp);

The function name here is incorret now.  I'd say just use __func__
to avoid that for the future.

Otherwise some of the code flow in the caller looks very ugly just with
this patch, but given that it is all sorted out by the end of the series
I don't really see an issue.

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/5] xfs: factor free block index lookup from xfs_dir2_node_addname_int()
  2019-08-29  6:30 ` [PATCH 3/5] xfs: factor free block index lookup " Dave Chinner
@ 2019-08-29  8:10   ` Christoph Hellwig
  2019-08-29  8:35     ` Dave Chinner
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2019-08-29  8:10 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 04:30:40PM +1000, Dave Chinner wrote:
> +	/*
> +	 * Now we know if we must allocate blocks, so if we are checking whether
> +	 * we can insert without allocation then we can return now.
> +	 */
> +	if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
> +		if (dbno != -1)
> +			return 0;
> +		return -ENOSPC;
> +	}

Nit: I'd invert the check to rturn -ENOSPC in the branch if dbno is
-1 to make the flow a littler easier.

Otherwise this looks great and makes the code much easier to read:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/5] xfs: speed up directory bestfree block scanning
  2019-08-29  6:30 ` [PATCH 4/5] xfs: speed up directory bestfree block scanning Dave Chinner
@ 2019-08-29  8:18   ` Christoph Hellwig
  2019-08-29  8:45     ` Dave Chinner
  2019-08-29  8:25   ` Christoph Hellwig
  1 sibling, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2019-08-29  8:18 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 04:30:41PM +1000, Dave Chinner wrote:
> +		bests = dp->d_ops->free_bests_p(free);
> +		dp->d_ops->free_hdr_from_disk(&freehdr, free);
>  		if (findex >= 0) {
>  			/* caller already found the freespace for us. */
> -			bests = dp->d_ops->free_bests_p(free);
> -			dp->d_ops->free_hdr_from_disk(&freehdr, free);
> -

I don't see any way how this is needed or helpful with this patch,
we are just going to ovewrite bests and freehdr before even looking
at them if the branch is not taken.

>  			ASSERT(findex < freehdr.nvalid);
>  			ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
>  			ASSERT(be16_to_cpu(bests[findex]) >= length);
>  			dbno = freehdr.firstdb + findex;
> -			goto out;
> +			goto found_block;

The label rename while more descriptive also seems entirely unrelated.

> +		findex = 0;
> +		free = fbp->b_addr;
>  		bests = dp->d_ops->free_bests_p(free);
>  		dp->d_ops->free_hdr_from_disk(&freehdr, free);
> +
> +		/* Scan the free entry array for a large enough free space. */
> +		do {
> +			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
> +			    be16_to_cpu(bests[findex]) >= length) {
> +				dbno = freehdr.firstdb + findex;
> +				goto found_block;
>  			}
> +		} while (++findex < freehdr.nvalid);

Nit: wou;dn't this be better written as a for loop also taking the
initialization of findex into the loop?

Otherwise this looks good.  I always like it when a speedup removes
code..

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/5] xfs: reverse search directory freespace indexes
  2019-08-29  6:30 ` [PATCH 5/5] xfs: reverse search directory freespace indexes Dave Chinner
@ 2019-08-29  8:23   ` Christoph Hellwig
  2019-08-29  9:14     ` Dave Chinner
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2019-08-29  8:23 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 04:30:42PM +1000, Dave Chinner wrote:
> 		create time(sec) / rate (files/s)
>  File count     vanilla             Prev commit		Patched
>   10k	      0.41 / 24.3k	   0.42 / 23.8k       0.41 / 24.3k
>   20k	      0.74 / 27.0k	   0.76 / 26.3k       0.75 / 26.7k
>  100k	      3.81 / 26.4k	   3.47 / 28.8k       3.27 / 30.6k
>  200k	      8.58 / 23.3k	   7.19 / 27.8k       6.71 / 29.8k
>    1M	     85.69 / 11.7k	  48.53 / 20.6k      37.67 / 26.5k
>    2M	    280.31 /  7.1k	 130.14 / 15.3k      79.55 / 25.2k
>   10M	   3913.26 /  2.5k                          552.89 / 18.1k

Impressive!

> Signed-Off-By: Dave Chinner <dchinner@redhat.com>

FYI, the Off here should be all lower case.  Patch 2 actually has the
same issue, but I only noticed it here.

> @@ -1781,6 +1782,9 @@ xfs_dir2_node_find_freeblk(
>  		 */
>  		ifbno = fblk->blkno;
>  		fbno = ifbno;
> +		xfs_trans_brelse(tp, fbp);
> +		fbp = NULL;
> +		fblk->bp = NULL;

Hmm.  Doesn't this actually belong into the previous patch?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/5] xfs: speed up directory bestfree block scanning
  2019-08-29  6:30 ` [PATCH 4/5] xfs: speed up directory bestfree block scanning Dave Chinner
  2019-08-29  8:18   ` Christoph Hellwig
@ 2019-08-29  8:25   ` Christoph Hellwig
  2019-08-29  9:31     ` Dave Chinner
  1 sibling, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2019-08-29  8:25 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

Actually, another comment:

> +		/* Scan the free entry array for a large enough free space. */
> +		do {
> +			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&

This could be changed to:

			if (bests[findex] != cpu_to_be16(NULLDATAOFF) &&

which might lead to slightly better code generation.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/5] xfs: factor data block addition from xfs_dir2_node_addname_int()
  2019-08-29  8:05   ` Christoph Hellwig
@ 2019-08-29  8:34     ` Dave Chinner
  0 siblings, 0 replies; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  8:34 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 01:05:29AM -0700, Christoph Hellwig wrote:
> On Thu, Aug 29, 2019 at 04:30:39PM +1000, Dave Chinner wrote:
> > +			XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
> > +					 XFS_ERRLEVEL_LOW, mp);
> 
> The function name here is incorret now.  I'd say just use __func__
> to avoid that for the future.

Fixed.

> Otherwise some of the code flow in the caller looks very ugly just with
> this patch, but given that it is all sorted out by the end of the series
> I don't really see an issue.
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>

Thanks.

-Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/5] xfs: factor free block index lookup from xfs_dir2_node_addname_int()
  2019-08-29  8:10   ` Christoph Hellwig
@ 2019-08-29  8:35     ` Dave Chinner
  0 siblings, 0 replies; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  8:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 01:10:13AM -0700, Christoph Hellwig wrote:
> On Thu, Aug 29, 2019 at 04:30:40PM +1000, Dave Chinner wrote:
> > +	/*
> > +	 * Now we know if we must allocate blocks, so if we are checking whether
> > +	 * we can insert without allocation then we can return now.
> > +	 */
> > +	if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
> > +		if (dbno != -1)
> > +			return 0;
> > +		return -ENOSPC;
> > +	}
> 
> Nit: I'd invert the check to rturn -ENOSPC in the branch if dbno is
> -1 to make the flow a littler easier.

Fixed.

-Dave.

-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/5] xfs: speed up directory bestfree block scanning
  2019-08-29  8:18   ` Christoph Hellwig
@ 2019-08-29  8:45     ` Dave Chinner
  2019-08-29  8:47       ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  8:45 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 01:18:22AM -0700, Christoph Hellwig wrote:
> On Thu, Aug 29, 2019 at 04:30:41PM +1000, Dave Chinner wrote:
> > +		bests = dp->d_ops->free_bests_p(free);
> > +		dp->d_ops->free_hdr_from_disk(&freehdr, free);
> >  		if (findex >= 0) {
> >  			/* caller already found the freespace for us. */
> > -			bests = dp->d_ops->free_bests_p(free);
> > -			dp->d_ops->free_hdr_from_disk(&freehdr, free);
> > -
> 
> I don't see any way how this is needed or helpful with this patch,
> we are just going to ovewrite bests and freehdr before even looking
> at them if the branch is not taken.

*nod*

The change is not useful anymore as a result of folding in your
previous suggestions. I'll revert it.

> >  			ASSERT(findex < freehdr.nvalid);
> >  			ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
> >  			ASSERT(be16_to_cpu(bests[findex]) >= length);
> >  			dbno = freehdr.firstdb + findex;
> > -			goto out;
> > +			goto found_block;
> 
> The label rename while more descriptive also seems entirely unrelated.

That was one of your previous suggestions :)

I'll push it back up one patch into the cleanup patch and leave this
as an optimisation only patch.

> > +		findex = 0;
> > +		free = fbp->b_addr;
> >  		bests = dp->d_ops->free_bests_p(free);
> >  		dp->d_ops->free_hdr_from_disk(&freehdr, free);
> > +
> > +		/* Scan the free entry array for a large enough free space. */
> > +		do {
> > +			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
> > +			    be16_to_cpu(bests[findex]) >= length) {
> > +				dbno = freehdr.firstdb + findex;
> > +				goto found_block;
> >  			}
> > +		} while (++findex < freehdr.nvalid);
> 
> Nit: wou;dn't this be better written as a for loop also taking the
> initialization of findex into the loop?

Agreed - the next patch does that with the reversal of the search
order. The end result is what you're asking for, so I'll leave this
alone for now....

> Otherwise this looks good.  I always like it when a speedup removes
> code..

I hadn't noticed that - I was more concerned with ending up with
readable code :)

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/5] xfs: speed up directory bestfree block scanning
  2019-08-29  8:45     ` Dave Chinner
@ 2019-08-29  8:47       ` Christoph Hellwig
  2019-08-29  8:55         ` Dave Chinner
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2019-08-29  8:47 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, linux-xfs

On Thu, Aug 29, 2019 at 06:45:30PM +1000, Dave Chinner wrote:
> > 
> > The label rename while more descriptive also seems entirely unrelated.
> 
> That was one of your previous suggestions :)
> 
> I'll push it back up one patch into the cleanup patch and leave this
> as an optimisation only patch.

Oh well.  Just keep it then :)

> > > +		/* Scan the free entry array for a large enough free space. */
> > > +		do {
> > > +			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
> > > +			    be16_to_cpu(bests[findex]) >= length) {
> > > +				dbno = freehdr.firstdb + findex;
> > > +				goto found_block;
> > >  			}
> > > +		} while (++findex < freehdr.nvalid);
> > 
> > Nit: wou;dn't this be better written as a for loop also taking the
> > initialization of findex into the loop?
> 
> Agreed - the next patch does that with the reversal of the search
> order. The end result is what you're asking for, so I'll leave this
> alone for now....

If you touch this patch anyway please just switch to the for loop
here, that keeps the churn down in the next one.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/5] xfs: speed up directory bestfree block scanning
  2019-08-29  8:47       ` Christoph Hellwig
@ 2019-08-29  8:55         ` Dave Chinner
  0 siblings, 0 replies; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  8:55 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 01:47:09AM -0700, Christoph Hellwig wrote:
> On Thu, Aug 29, 2019 at 06:45:30PM +1000, Dave Chinner wrote:
> > > 
> > > The label rename while more descriptive also seems entirely unrelated.
> > 
> > That was one of your previous suggestions :)
> > 
> > I'll push it back up one patch into the cleanup patch and leave this
> > as an optimisation only patch.
> 
> Oh well.  Just keep it then :)

It was introduced in the previous patch, anyway :P

> > > > +		/* Scan the free entry array for a large enough free space. */
> > > > +		do {
> > > > +			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
> > > > +			    be16_to_cpu(bests[findex]) >= length) {
> > > > +				dbno = freehdr.firstdb + findex;
> > > > +				goto found_block;
> > > >  			}
> > > > +		} while (++findex < freehdr.nvalid);
> > > 
> > > Nit: wou;dn't this be better written as a for loop also taking the
> > > initialization of findex into the loop?
> > 
> > Agreed - the next patch does that with the reversal of the search
> > order. The end result is what you're asking for, so I'll leave this
> > alone for now....
> 
> If you touch this patch anyway please just switch to the for loop
> here, that keeps the churn down in the next one.

OK.

-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/5] xfs: reverse search directory freespace indexes
  2019-08-29  8:23   ` Christoph Hellwig
@ 2019-08-29  9:14     ` Dave Chinner
  0 siblings, 0 replies; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  9:14 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 01:23:10AM -0700, Christoph Hellwig wrote:
> On Thu, Aug 29, 2019 at 04:30:42PM +1000, Dave Chinner wrote:
> > 		create time(sec) / rate (files/s)
> >  File count     vanilla             Prev commit		Patched
> >   10k	      0.41 / 24.3k	   0.42 / 23.8k       0.41 / 24.3k
> >   20k	      0.74 / 27.0k	   0.76 / 26.3k       0.75 / 26.7k
> >  100k	      3.81 / 26.4k	   3.47 / 28.8k       3.27 / 30.6k
> >  200k	      8.58 / 23.3k	   7.19 / 27.8k       6.71 / 29.8k
> >    1M	     85.69 / 11.7k	  48.53 / 20.6k      37.67 / 26.5k
> >    2M	    280.31 /  7.1k	 130.14 / 15.3k      79.55 / 25.2k
> >   10M	   3913.26 /  2.5k                          552.89 / 18.1k
> 
> Impressive!

Yeah, i think the  drop-off in performance at 10M inodes is caused
by running out of RAM at about 6M inodes, and so there's extra CPU
time spent in direct memory reclaim after than so the single
threaded create performance is lower from as a result.

> > Signed-Off-By: Dave Chinner <dchinner@redhat.com>
> 
> FYI, the Off here should be all lower case.  Patch 2 actually has the
> same issue, but I only noticed it here.

Yeah, 3 of 5 patches are like that. No idea how - I use macros for
all these sobs and rvbs and they all use lower case....

> > @@ -1781,6 +1782,9 @@ xfs_dir2_node_find_freeblk(
> >  		 */
> >  		ifbno = fblk->blkno;
> >  		fbno = ifbno;
> > +		xfs_trans_brelse(tp, fbp);
> > +		fbp = NULL;
> > +		fblk->bp = NULL;
> 
> Hmm.  Doesn't this actually belong into the previous patch?

Yup, I'll move them.

-Dave.

-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/5] xfs: speed up directory bestfree block scanning
  2019-08-29  8:25   ` Christoph Hellwig
@ 2019-08-29  9:31     ` Dave Chinner
  2019-08-29  9:33       ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Dave Chinner @ 2019-08-29  9:31 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 01:25:01AM -0700, Christoph Hellwig wrote:
> Actually, another comment:
> 
> > +		/* Scan the free entry array for a large enough free space. */
> > +		do {
> > +			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
> 
> This could be changed to:
> 
> 			if (bests[findex] != cpu_to_be16(NULLDATAOFF) &&
> 
> which might lead to slightly better code generation.

I don't think it will make any difference because the very next
comparison in the if() statement needs the cpu order bests[findex]
value because its a >= check. Hence we have to calculate it anyway,
and the compiler should be smart enough to only evaluate it once...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/5] xfs: speed up directory bestfree block scanning
  2019-08-29  9:31     ` Dave Chinner
@ 2019-08-29  9:33       ` Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2019-08-29  9:33 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, linux-xfs

On Thu, Aug 29, 2019 at 07:31:17PM +1000, Dave Chinner wrote:
> On Thu, Aug 29, 2019 at 01:25:01AM -0700, Christoph Hellwig wrote:
> > Actually, another comment:
> > 
> > > +		/* Scan the free entry array for a large enough free space. */
> > > +		do {
> > > +			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
> > 
> > This could be changed to:
> > 
> > 			if (bests[findex] != cpu_to_be16(NULLDATAOFF) &&
> > 
> > which might lead to slightly better code generation.
> 
> I don't think it will make any difference because the very next
> comparison in the if() statement needs the cpu order bests[findex]
> value because its a >= check. Hence we have to calculate it anyway,
> and the compiler should be smart enough to only evaluate it once...

Yeah, makes sense.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/5] xfs: reverse search directory freespace indexes
  2019-08-29 10:47 ` [PATCH 5/5] xfs: reverse search directory freespace indexes Dave Chinner
  2019-08-29 21:20   ` Darrick J. Wong
@ 2019-08-30  5:24   ` Christoph Hellwig
  1 sibling, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2019-08-30  5:24 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 08:47:10PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> When a directory is growing rapidly, new blocks tend to get added at
> the end of the directory. These end up at the end of the freespace
> index, and when the directory gets large finding these new
> freespaces gets expensive. The code does a linear search across the
> frespace index from the first block in the directory to the last,
> hence meaning the newly added space is the last index searched.
> 
> Instead, do a reverse order index search, starting from the last
> block and index in the freespace index. This makes most lookups for
> free space on rapidly growing directories O(1) instead of O(N), but
> should not have any impact on random insert workloads because the
> average search length is the same regardless of which end of the
> array we start at.
> 
> The result is a major improvement in large directory grow rates:
> 
> 		create time(sec) / rate (files/s)
>  File count     vanilla             Prev commit		Patched
>   10k	      0.41 / 24.3k	   0.42 / 23.8k       0.41 / 24.3k
>   20k	      0.74 / 27.0k	   0.76 / 26.3k       0.75 / 26.7k
>  100k	      3.81 / 26.4k	   3.47 / 28.8k       3.27 / 30.6k
>  200k	      8.58 / 23.3k	   7.19 / 27.8k       6.71 / 29.8k
>    1M	     85.69 / 11.7k	  48.53 / 20.6k      37.67 / 26.5k
>    2M	    280.31 /  7.1k	 130.14 / 15.3k      79.55 / 25.2k
>   10M	   3913.26 /  2.5k                          552.89 / 18.1k
> 
> Signed-Off-By: Dave Chinner <dchinner@redhat.com>

Same here.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/5] xfs: reverse search directory freespace indexes
  2019-08-29 10:47 ` [PATCH 5/5] xfs: reverse search directory freespace indexes Dave Chinner
@ 2019-08-29 21:20   ` Darrick J. Wong
  2019-08-30  5:24   ` Christoph Hellwig
  1 sibling, 0 replies; 25+ messages in thread
From: Darrick J. Wong @ 2019-08-29 21:20 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On Thu, Aug 29, 2019 at 08:47:10PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> When a directory is growing rapidly, new blocks tend to get added at
> the end of the directory. These end up at the end of the freespace
> index, and when the directory gets large finding these new
> freespaces gets expensive. The code does a linear search across the
> frespace index from the first block in the directory to the last,
> hence meaning the newly added space is the last index searched.
> 
> Instead, do a reverse order index search, starting from the last
> block and index in the freespace index. This makes most lookups for
> free space on rapidly growing directories O(1) instead of O(N), but
> should not have any impact on random insert workloads because the
> average search length is the same regardless of which end of the
> array we start at.
> 
> The result is a major improvement in large directory grow rates:
> 
> 		create time(sec) / rate (files/s)
>  File count     vanilla             Prev commit		Patched
>   10k	      0.41 / 24.3k	   0.42 / 23.8k       0.41 / 24.3k
>   20k	      0.74 / 27.0k	   0.76 / 26.3k       0.75 / 26.7k
>  100k	      3.81 / 26.4k	   3.47 / 28.8k       3.27 / 30.6k
>  200k	      8.58 / 23.3k	   7.19 / 27.8k       6.71 / 29.8k
>    1M	     85.69 / 11.7k	  48.53 / 20.6k      37.67 / 26.5k
>    2M	    280.31 /  7.1k	 130.14 / 15.3k      79.55 / 25.2k
>   10M	   3913.26 /  2.5k                          552.89 / 18.1k
> 
> Signed-Off-By: Dave Chinner <dchinner@redhat.com>

Heh, neat.

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/libxfs/xfs_dir2_node.c | 13 +++++--------
>  1 file changed, 5 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
> index a81f56d9e538..705c4f562758 100644
> --- a/fs/xfs/libxfs/xfs_dir2_node.c
> +++ b/fs/xfs/libxfs/xfs_dir2_node.c
> @@ -1745,10 +1745,11 @@ xfs_dir2_node_find_freeblk(
>  	struct xfs_inode	*dp = args->dp;
>  	struct xfs_trans	*tp = args->trans;
>  	struct xfs_buf		*fbp = NULL;
> +	xfs_dir2_db_t		firstfbno;
>  	xfs_dir2_db_t		lastfbno;
>  	xfs_dir2_db_t		ifbno = -1;
>  	xfs_dir2_db_t		dbno = -1;
> -	xfs_dir2_db_t		fbno = -1;
> +	xfs_dir2_db_t		fbno;
>  	xfs_fileoff_t		fo;
>  	__be16			*bests = NULL;
>  	int			findex = 0;
> @@ -1780,7 +1781,6 @@ xfs_dir2_node_find_freeblk(
>  		 * We'll start at the beginning of the freespace entries.
>  		 */
>  		ifbno = fblk->blkno;
> -		fbno = ifbno;
>  		xfs_trans_brelse(tp, fbp);
>  		fbp = NULL;
>  		fblk->bp = NULL;
> @@ -1794,12 +1794,9 @@ xfs_dir2_node_find_freeblk(
>  	if (error)
>  		return error;
>  	lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
> +	firstfbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
>  
> -	/* If we haven't get a search start block, set it now */
> -	if (fbno == -1)
> -		fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
> -
> -	for ( ; fbno < lastfbno; fbno++) {
> +	for (fbno = lastfbno - 1; fbno >= firstfbno; fbno--) {
>  		/* If it's ifbno we already looked at it. */
>  		if (fbno == ifbno)
>  			continue;
> @@ -1822,7 +1819,7 @@ xfs_dir2_node_find_freeblk(
>  		dp->d_ops->free_hdr_from_disk(&freehdr, free);
>  
>  		/* Scan the free entry array for a large enough free space. */
> -		for (findex = 0; findex < freehdr.nvalid; findex++) {
> +		for (findex = freehdr.nvalid - 1; findex >= 0; findex--) {
>  			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
>  			    be16_to_cpu(bests[findex]) >= length) {
>  				dbno = freehdr.firstdb + findex;
> -- 
> 2.23.0.rc1
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 5/5] xfs: reverse search directory freespace indexes
  2019-08-29 10:47 [PATCH v3 0/5] xfs: speed up large directory modifications Dave Chinner
@ 2019-08-29 10:47 ` Dave Chinner
  2019-08-29 21:20   ` Darrick J. Wong
  2019-08-30  5:24   ` Christoph Hellwig
  0 siblings, 2 replies; 25+ messages in thread
From: Dave Chinner @ 2019-08-29 10:47 UTC (permalink / raw)
  To: linux-xfs

From: Dave Chinner <dchinner@redhat.com>

When a directory is growing rapidly, new blocks tend to get added at
the end of the directory. These end up at the end of the freespace
index, and when the directory gets large finding these new
freespaces gets expensive. The code does a linear search across the
frespace index from the first block in the directory to the last,
hence meaning the newly added space is the last index searched.

Instead, do a reverse order index search, starting from the last
block and index in the freespace index. This makes most lookups for
free space on rapidly growing directories O(1) instead of O(N), but
should not have any impact on random insert workloads because the
average search length is the same regardless of which end of the
array we start at.

The result is a major improvement in large directory grow rates:

		create time(sec) / rate (files/s)
 File count     vanilla             Prev commit		Patched
  10k	      0.41 / 24.3k	   0.42 / 23.8k       0.41 / 24.3k
  20k	      0.74 / 27.0k	   0.76 / 26.3k       0.75 / 26.7k
 100k	      3.81 / 26.4k	   3.47 / 28.8k       3.27 / 30.6k
 200k	      8.58 / 23.3k	   7.19 / 27.8k       6.71 / 29.8k
   1M	     85.69 / 11.7k	  48.53 / 20.6k      37.67 / 26.5k
   2M	    280.31 /  7.1k	 130.14 / 15.3k      79.55 / 25.2k
  10M	   3913.26 /  2.5k                          552.89 / 18.1k

Signed-Off-By: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_dir2_node.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index a81f56d9e538..705c4f562758 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -1745,10 +1745,11 @@ xfs_dir2_node_find_freeblk(
 	struct xfs_inode	*dp = args->dp;
 	struct xfs_trans	*tp = args->trans;
 	struct xfs_buf		*fbp = NULL;
+	xfs_dir2_db_t		firstfbno;
 	xfs_dir2_db_t		lastfbno;
 	xfs_dir2_db_t		ifbno = -1;
 	xfs_dir2_db_t		dbno = -1;
-	xfs_dir2_db_t		fbno = -1;
+	xfs_dir2_db_t		fbno;
 	xfs_fileoff_t		fo;
 	__be16			*bests = NULL;
 	int			findex = 0;
@@ -1780,7 +1781,6 @@ xfs_dir2_node_find_freeblk(
 		 * We'll start at the beginning of the freespace entries.
 		 */
 		ifbno = fblk->blkno;
-		fbno = ifbno;
 		xfs_trans_brelse(tp, fbp);
 		fbp = NULL;
 		fblk->bp = NULL;
@@ -1794,12 +1794,9 @@ xfs_dir2_node_find_freeblk(
 	if (error)
 		return error;
 	lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
+	firstfbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
 
-	/* If we haven't get a search start block, set it now */
-	if (fbno == -1)
-		fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
-
-	for ( ; fbno < lastfbno; fbno++) {
+	for (fbno = lastfbno - 1; fbno >= firstfbno; fbno--) {
 		/* If it's ifbno we already looked at it. */
 		if (fbno == ifbno)
 			continue;
@@ -1822,7 +1819,7 @@ xfs_dir2_node_find_freeblk(
 		dp->d_ops->free_hdr_from_disk(&freehdr, free);
 
 		/* Scan the free entry array for a large enough free space. */
-		for (findex = 0; findex < freehdr.nvalid; findex++) {
+		for (findex = freehdr.nvalid - 1; findex >= 0; findex--) {
 			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
 			    be16_to_cpu(bests[findex]) >= length) {
 				dbno = freehdr.firstdb + findex;
-- 
2.23.0.rc1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/5] xfs: reverse search directory freespace indexes
  2018-10-24 22:57 ` [PATCH 5/5] xfs: reverse search directory freespace indexes Dave Chinner
@ 2018-10-26 12:14   ` Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2018-10-26 12:14 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

This:

>  	/* If we haven't get a search start block, set it now */
>  	if (fbno == -1)
		fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);

Can be removed now, as it will be overwritten again a few lines down.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 5/5] xfs: reverse search directory freespace indexes
  2018-10-24 22:57 [PATCH 0/5] xfs: speed up large directory modifications Dave Chinner
@ 2018-10-24 22:57 ` Dave Chinner
  2018-10-26 12:14   ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Dave Chinner @ 2018-10-24 22:57 UTC (permalink / raw)
  To: linux-xfs

From: Dave Chinner <dchinner@redhat.com>

When a directory is growing rapidly, new blocks tend to get added at
the end of the directory. These end up at the end of the freespace
index, and when the directory gets large finding these new
freespaces gets expensive. The code does a linear search across the
frespace index from the first block in the directory to the last,
hence meaning the newly added space is the last index searched.

Instead, do a reverse order index search, starting from the last
block and index in the freespace index. This makes most lookups for
free space on rapidly growing directories O(1) instead of O(N), but
should not have any impact on random insert workloads because the
average search length is the same regardless of which end of the
array we start at.

The result is a major improvement in large directory grow rates:

                create time(sec) / rate (files/s)
 File count     vanilla             patched
   10k        0.54 / 18.5k         0.52 / 19.3k
   20k        1.10 / 18.1k         1.00 / 20.0k
  100k        4.21 / 23.8k         3.58 / 27.9k
  200k        9.66 / 20,7k         7.08 / 28.3k
    1M       86.61 / 11.5k        38.33 / 26.1k
    2M      206.13 /  9.7k        82.20 / 24.3k
   10M     2843.57 /  3.5k       591.78 / 16.9k

Signed-Off-By: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_dir2_node.c | 69 +++++++++++++++++------------------
 1 file changed, 34 insertions(+), 35 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 621f63de075c..aa52dc740305 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -1747,7 +1747,8 @@ xfs_dir2_node_find_freeblk(
 	struct xfs_inode	*dp = args->dp;
 	struct xfs_trans	*tp = args->trans;
 	struct xfs_buf		*fbp = NULL;
-	int			findex;
+	int			findex = 0;
+	xfs_dir2_db_t		firstfbno;
 	xfs_dir2_db_t		lastfbno;
 	xfs_dir2_db_t		ifbno = -1;
 	xfs_dir2_db_t		dbno = -1;
@@ -1775,7 +1776,7 @@ xfs_dir2_node_find_freeblk(
 			ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
 			ASSERT(be16_to_cpu(bests[findex]) >= length);
 			dbno = freehdr.firstdb + findex;
-			goto out;
+			goto found_block;
 		}
 
 		/*
@@ -1784,9 +1785,10 @@ xfs_dir2_node_find_freeblk(
 		 */
 		ifbno = fblk->blkno;
 		fbno = ifbno;
+		xfs_trans_brelse(tp, fbp);
+		fbp = NULL;
+		fblk->bp = NULL;
 	}
-	ASSERT(dbno == -1);
-	findex = 0;
 
 	/*
 	 * If we don't have a data block yet, we're going to scan the freespace
@@ -1797,6 +1799,7 @@ xfs_dir2_node_find_freeblk(
 	if (error)
 		return error;
 	lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
+	firstfbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
 
 	/* If we haven't get a search start block, set it now */
 	if (fbno == -1)
@@ -1804,51 +1807,47 @@ xfs_dir2_node_find_freeblk(
 
 	/*
 	 * While we haven't identified a data block, search the freeblock
-	 * data for a data block with enough free space in it.
+	 * data for a good data block. Do a reverse order search, as growing
+	 * directories will put new blocks with free space at the end of the
+	 * free space index.
 	 */
-	for ( ; fbno < lastfbno; fbno++) {
-		/* If we don't have a freeblock in hand, get the next one. */
-		if (fbp == NULL) {
-			/* If it's ifbno we already looked at it. */
-			if (fbno == ifbno)
-				continue;
+	for (fbno = lastfbno - 1; fbno >= firstfbno; fbno--) {
+		/* If it's ifbno we already looked at it. */
+		if (fbno == ifbno)
+			continue;
 
-			/*
-			 * Read the block.  There can be holes in the freespace
-			 * blocks, so this might not succeed.  This should be
-			 * really rare, so there's no reason to avoid it.
-			 */
-			error = xfs_dir2_free_try_read(tp, dp,
-					xfs_dir2_db_to_da(args->geo, fbno),
-					&fbp);
-			if (error)
-				return error;
-			if (!fbp)
-				continue;
+		/*
+		 * Read the block.  There can be holes in the freespace
+		 * blocks, so this might not succeed.  This should be
+		 * really rare, so there's no reason to avoid it.
+		 */
+		error = xfs_dir2_free_try_read(tp, dp,
+				xfs_dir2_db_to_da(args->geo, fbno),
+				&fbp);
+		if (error)
+			return error;
+		if (!fbp)
+			continue;
 
-			findex = 0;
-			free = fbp->b_addr;
-			bests = dp->d_ops->free_bests_p(free);
-			dp->d_ops->free_hdr_from_disk(&freehdr, free);
-		}
+		findex = 0;
+		free = fbp->b_addr;
+		bests = dp->d_ops->free_bests_p(free);
+		dp->d_ops->free_hdr_from_disk(&freehdr, free);
 
 		/* Scan the free entry array for a large enough free space. */
-		do {
+		for (findex = freehdr.nvalid - 1; findex >= 0; findex--) {
 			if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
 			    be16_to_cpu(bests[findex]) >= length) {
 				dbno = freehdr.firstdb + findex;
-				goto out;
+				goto found_block;
 			}
-		} while (++findex < freehdr.nvalid);
+		}
 
 		/* Didn't find free space, go on to next free block */
 		xfs_trans_brelse(tp, fbp);
-		fbp = NULL;
-		if (fblk)
-			fblk->bp = NULL;
 	}
 
-out:
+found_block:
 	*dbnop = dbno;
 	*fbpp = fbp;
 	*findexp = findex;
-- 
2.19.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2019-08-30  5:24 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-29  6:30 [PATCH V2 0/5] xfs: speed up large directory modifications Dave Chinner
2019-08-29  6:30 ` [PATCH 1/5] xfs: move xfs_dir2_addname() Dave Chinner
2019-08-29  7:59   ` Christoph Hellwig
2019-08-29  6:30 ` [PATCH 2/5] xfs: factor data block addition from xfs_dir2_node_addname_int() Dave Chinner
2019-08-29  8:05   ` Christoph Hellwig
2019-08-29  8:34     ` Dave Chinner
2019-08-29  6:30 ` [PATCH 3/5] xfs: factor free block index lookup " Dave Chinner
2019-08-29  8:10   ` Christoph Hellwig
2019-08-29  8:35     ` Dave Chinner
2019-08-29  6:30 ` [PATCH 4/5] xfs: speed up directory bestfree block scanning Dave Chinner
2019-08-29  8:18   ` Christoph Hellwig
2019-08-29  8:45     ` Dave Chinner
2019-08-29  8:47       ` Christoph Hellwig
2019-08-29  8:55         ` Dave Chinner
2019-08-29  8:25   ` Christoph Hellwig
2019-08-29  9:31     ` Dave Chinner
2019-08-29  9:33       ` Christoph Hellwig
2019-08-29  6:30 ` [PATCH 5/5] xfs: reverse search directory freespace indexes Dave Chinner
2019-08-29  8:23   ` Christoph Hellwig
2019-08-29  9:14     ` Dave Chinner
  -- strict thread matches above, loose matches on Subject: below --
2019-08-29 10:47 [PATCH v3 0/5] xfs: speed up large directory modifications Dave Chinner
2019-08-29 10:47 ` [PATCH 5/5] xfs: reverse search directory freespace indexes Dave Chinner
2019-08-29 21:20   ` Darrick J. Wong
2019-08-30  5:24   ` Christoph Hellwig
2018-10-24 22:57 [PATCH 0/5] xfs: speed up large directory modifications Dave Chinner
2018-10-24 22:57 ` [PATCH 5/5] xfs: reverse search directory freespace indexes Dave Chinner
2018-10-26 12:14   ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).