All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: linux-xfs@vger.kernel.org
Subject: [PATCH RFC 4/4] xfs: skip busy inodes on finobt inode allocation
Date: Thu, 17 Feb 2022 12:25:18 -0500	[thread overview]
Message-ID: <20220217172518.3842951-5-bfoster@redhat.com> (raw)
In-Reply-To: <20220217172518.3842951-1-bfoster@redhat.com>

Experimental algorithm to skip busy inodes on finobt based inode
allocation. This is a first pass implementation intended primarily
for functional and performance experimentation. The logic is
intentionally kept simple to minimize the scope of changes required
to demonstrate functionality.

The existing finobt inode allocation algorithms are updated to
filter out records that are covered by at least one still-busy
inode[1] and continue scanning as appropriate based on the
allocation mode. For example, near allocation mode continues
scanning left and right until a usable record is found. newino mode
checks the target record and then scans from the first record in the
tree until a usable record is found. If the associated algorithm
cannot find a usable record, it falls back to new chunk allocation
to add non-busy inodes to the selection pool and restarts.

[1] As I write this, it occurs to me this logic could be further
improved to compare the first busy inode against the first free
inode in the record without disrupting the subsequent inode
selection logic.

Not-Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_ialloc.c | 81 +++++++++++++++++++++++++++++++++++---
 1 file changed, 76 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 3eb41228e210..c79c85327cf4 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1248,12 +1248,53 @@ xfs_dialloc_ag_inobt(
 	return error;
 }
 
+#define XFS_LOOKUP_BATCH	XFS_INODES_PER_CHUNK
+#define XFS_ICI_BUSY_TAG	2
+STATIC bool
+xfs_dialloc_ag_inobt_rec_busy(
+	struct xfs_perag		*pag,
+	struct xfs_inobt_rec_incore	*rec)
+{
+	struct xfs_inode		*batch[XFS_LOOKUP_BATCH];
+	struct xfs_inode		*ip;
+	int				found, i;
+	xfs_agino_t			startino = rec->ir_startino;
+	bool				busy = false;
+	unsigned long			destroy_gp;
+
+	rcu_read_lock();
+	found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, (void **) batch,
+					   startino, XFS_LOOKUP_BATCH,
+					   XFS_ICI_BUSY_TAG);
+	for (i = 0; i < found; i++) {
+		ip = batch[i];
+		spin_lock(&ip->i_flags_lock);
+		if (ip->i_ino >= startino + XFS_INODES_PER_CHUNK) {
+			spin_unlock(&ip->i_flags_lock);
+			break;
+		}
+		destroy_gp = ip->i_destroy_gp;
+		spin_unlock(&ip->i_flags_lock);
+
+		if (!poll_state_synchronize_rcu(destroy_gp)) {
+			busy = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	trace_printk("%d: agno %d startino 0x%x found %d busy %d caller %pS\n",
+		     __LINE__, pag->pag_agno, startino, found, busy,
+		     (void *) _RET_IP_);
+	return busy;
+}
+
 /*
  * Use the free inode btree to allocate an inode based on distance from the
  * parent. Note that the provided cursor may be deleted and replaced.
  */
 STATIC int
 xfs_dialloc_ag_finobt_near(
+	struct xfs_perag		*pag,
 	xfs_agino_t			pagino,
 	struct xfs_btree_cur		**ocur,
 	struct xfs_inobt_rec_incore	*rec)
@@ -1281,8 +1322,10 @@ xfs_dialloc_ag_finobt_near(
 		 * existence is enough.
 		 */
 		if (pagino >= rec->ir_startino &&
-		    pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
-			return 0;
+		    pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) {
+			if (!xfs_dialloc_ag_inobt_rec_busy(pag, rec))
+				return 0;
+		}
 	}
 
 	error = xfs_btree_dup_cursor(lcur, &rcur);
@@ -1306,6 +1349,21 @@ xfs_dialloc_ag_finobt_near(
 		error = -EFSCORRUPTED;
 		goto error_rcur;
 	}
+
+	while (i == 1 && xfs_dialloc_ag_inobt_rec_busy(pag, rec)) {
+		error = xfs_ialloc_next_rec(lcur, rec, &i, 1);
+		if (error)
+			goto error_rcur;
+		i = !i;
+	}
+
+	while (j == 1 && xfs_dialloc_ag_inobt_rec_busy(pag, &rrec)) {
+		error = xfs_ialloc_next_rec(rcur, &rrec, &j, 0);
+		if (error)
+			goto error_rcur;
+		j = !j;
+	}
+
 	if (i == 1 && j == 1) {
 		/*
 		 * Both the left and right records are valid. Choose the closer
@@ -1327,6 +1385,9 @@ xfs_dialloc_ag_finobt_near(
 	} else if (i == 1) {
 		/* only the left record is valid */
 		xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
+	} else {
+		error = -EAGAIN;
+		goto error_rcur;
 	}
 
 	return 0;
@@ -1342,6 +1403,7 @@ xfs_dialloc_ag_finobt_near(
  */
 STATIC int
 xfs_dialloc_ag_finobt_newino(
+	struct xfs_perag		*pag,
 	struct xfs_agi			*agi,
 	struct xfs_btree_cur		*cur,
 	struct xfs_inobt_rec_incore	*rec)
@@ -1360,7 +1422,8 @@ xfs_dialloc_ag_finobt_newino(
 				return error;
 			if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
 				return -EFSCORRUPTED;
-			return 0;
+			if (!xfs_dialloc_ag_inobt_rec_busy(pag, rec))
+				return 0;
 		}
 	}
 
@@ -1379,6 +1442,14 @@ xfs_dialloc_ag_finobt_newino(
 	if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
 		return -EFSCORRUPTED;
 
+	while (xfs_dialloc_ag_inobt_rec_busy(pag, rec)) {
+		error = xfs_ialloc_next_rec(cur, rec, &i, 0);
+		if (error)
+			return error;
+		if (i == 1)
+			return -EAGAIN;
+	}
+
 	return 0;
 }
 
@@ -1470,9 +1541,9 @@ xfs_dialloc_ag(
 	 * not, consider the agi hint or find the first free inode in the AG.
 	 */
 	if (pag->pag_agno == pagno)
-		error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
+		error = xfs_dialloc_ag_finobt_near(pag, pagino, &cur, &rec);
 	else
-		error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
+		error = xfs_dialloc_ag_finobt_newino(pag, agi, cur, &rec);
 	if (error)
 		goto error_cur;
 
-- 
2.31.1


      parent reply	other threads:[~2022-02-17 17:25 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-17 17:25 [PATCH RFC 0/4] xfs: track and skip realloc of busy inodes Brian Foster
2022-02-17 17:25 ` [PATCH RFC 1/4] xfs: require an rcu grace period before inode recycle Brian Foster
2022-02-17 17:25 ` [PATCH RFC 2/4] xfs: tag reclaimable inodes with pending RCU grace periods as busy Brian Foster
2022-02-17 23:16   ` Dave Chinner
2022-02-18 14:19     ` Brian Foster
2022-02-17 17:25 ` [PATCH RFC 3/4] xfs: crude chunk allocation retry mechanism Brian Foster
2022-02-17 23:20   ` Dave Chinner
2022-02-18 14:21     ` Brian Foster
2022-02-18 22:54       ` Dave Chinner
2022-02-20 18:48         ` Brian Foster
2022-02-23  7:00           ` Dave Chinner
2022-02-28 21:45             ` Brian Foster
2022-02-28 22:55               ` Dave Chinner
2022-03-01 15:05                 ` Brian Foster
2022-02-17 17:25 ` Brian Foster [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220217172518.3842951-5-bfoster@redhat.com \
    --to=bfoster@redhat.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.