All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gao Xiang <hsiangkao@redhat.com>
To: linux-xfs@vger.kernel.org
Cc: Dave Chinner <david@fromorbit.com>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	Brian Foster <bfoster@redhat.com>,
	Gao Xiang <hsiangkao@redhat.com>
Subject: [RFC PATCH v2 3/3] xfs: insert unlinked inodes from tail
Date: Fri, 24 Jul 2020 14:12:59 +0800	[thread overview]
Message-ID: <20200724061259.5519-4-hsiangkao@redhat.com> (raw)
In-Reply-To: <20200724061259.5519-1-hsiangkao@redhat.com>

Currently, AGI buffer is always touched since xfs_iunlink()
adds unlinked inodes from head unconditionally, but since we
now have the only one unlinked list and if we insert unlinked
inodes from tail instead and there're more than 1 inodes, AGI
buffer modification could be avoided.

In order to do that, let's keep track of the tail of unlinked
inode into the perag all the time in order for xfs_iunlink()
to use.

With this change, it shows that only 938 of 10000 operations
modifies the head of unlinked list with the following workload:
 seq 1 10000 | xargs touch
 find . | xargs -n3 -P100 rm

Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
 fs/xfs/xfs_inode.c       | 120 ++++++++++++++++++++++++---------------
 fs/xfs/xfs_log_recover.c |   5 ++
 fs/xfs/xfs_mount.c       |   1 +
 fs/xfs/xfs_mount.h       |   3 +
 4 files changed, 84 insertions(+), 45 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d78aaa8ce252..3cfd84b76955 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1986,6 +1986,38 @@ xfs_iunlink_update_bucket(
 	return 0;
 }
 
+/*
+ * Lock the perag and take AGI lock if agi_unlinked is touched as well
+ * for xfs_iunlink_insert_inode(). As for the details of locking order,
+ * refer to the comments of xfs_iunlink_remove_lock().
+ */
+static struct xfs_perag *
+xfs_iunlink_insert_lock(
+	xfs_agino_t		agno,
+	struct xfs_trans        *tp,
+	struct xfs_inode	*ip,
+	struct xfs_buf		**agibpp)
+{
+	struct xfs_mount        *mp = tp->t_mountp;
+	struct xfs_perag	*pag;
+	int			error;
+
+	pag = xfs_perag_get(mp, agno);
+	mutex_lock(&pag->pag_unlinked_mutex);
+
+	if (!pag->pag_unlinked_tail) {
+		mutex_unlock(&pag->pag_unlinked_mutex);
+
+		error = xfs_read_agi(mp, tp, agno, agibpp);
+		if (error) {
+			xfs_perag_put(pag);
+			return ERR_PTR(error);
+		}
+		mutex_lock(&pag->pag_unlinked_mutex);
+	}
+	return pag;
+}
+
 /*
  * Always insert at the head, so we only have to do a next inode lookup to
  * update it's prev pointer. The AGI bucket will point at the one we are
@@ -1993,50 +2025,47 @@ xfs_iunlink_update_bucket(
  */
 static int
 xfs_iunlink_insert_inode(
+	struct xfs_perag	*pag,
 	struct xfs_trans	*tp,
 	struct xfs_buf		*agibp,
 	struct xfs_inode	*ip)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_agi		*agi = agibp->b_addr;
-	xfs_agino_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+	struct xfs_inode	*pip = pag->pag_unlinked_tail;
 	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	xfs_agino_t		next_agino;
 
-	/*
-	 * Get the index into the agi hash table for the list this inode will
-	 * go on.  Make sure the pointer isn't garbage and that this inode
-	 * isn't already on the list.
-	 */
-	next_agino = be32_to_cpu(agi->agi_unlinked[0]);
-	if (next_agino == agino ||
-	    !xfs_verify_agino_or_null(mp, agno, next_agino)) {
-		xfs_buf_mark_corrupt(agibp);
-		return -EFSCORRUPTED;
-	}
-
-	ip->i_prev_unlinked = NULLAGINO;
-	ip->i_next_unlinked = next_agino;
-	if (ip->i_next_unlinked != NULLAGINO) {
-		struct xfs_inode	*nip;
-
-		nip = xfs_iunlink_lookup_next(agibp->b_pag, ip);
-		if (IS_ERR_OR_NULL(nip))
-			return -EFSCORRUPTED;
+	if (!pip) {
+		xfs_agino_t	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+		struct xfs_agi	*agi = agibp->b_addr;
+		int		error;
 
-		if (nip->i_prev_unlinked != NULLAGINO) {
-			xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
-						NULL, 0, __this_address);
+		ip->i_prev_unlinked = NULLAGINO;
+		/*
+		 * Get the index into the agi hash table for the list this
+		 * inode will go on.  Make sure the pointer isn't garbage
+		 * and that this inode isn't already on the list.
+		 */
+		next_agino = be32_to_cpu(agi->agi_unlinked[0]);
+		if (next_agino == agino ||
+		    !xfs_verify_agino_or_null(mp, agno, next_agino)) {
+			xfs_buf_mark_corrupt(agibp);
 			return -EFSCORRUPTED;
 		}
-		nip->i_prev_unlinked = agino;
 
-		/* update the on disk inode now */
-		xfs_iunlink_log(tp, ip);
+		/* Point the head of the list to point to this inode. */
+		error = xfs_iunlink_update_bucket(tp, agno, agibp, 0, agino);
+		if (error)
+			return error;
+	} else {
+		ip->i_prev_unlinked = XFS_INO_TO_AGINO(mp, pip->i_ino);
+		ASSERT(pip->i_next_unlinked == NULLAGINO);
+		pip->i_next_unlinked = agino;
+		xfs_iunlink_log(tp, pip);
 	}
-
-	/* Point the head of the list to point to this inode. */
-	return xfs_iunlink_update_bucket(tp, agno, agibp, 0, agino);
+	ip->i_next_unlinked = NULLAGINO;
+	pag->pag_unlinked_tail = ip;
+	return 0;
 }
 
 /*
@@ -2095,6 +2124,7 @@ xfs_iunlink_remove_inode(
 	xfs_agino_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	xfs_agino_t		next_agino = ip->i_next_unlinked;
+	struct xfs_inode	*pip = NULL;
 	int			error;
 
 	if (ip->i_prev_unlinked == NULLAGINO) {
@@ -2122,8 +2152,6 @@ xfs_iunlink_remove_inode(
 			return -EFSCORRUPTED;
 	} else {
 		/* lookup previous inode and update to point at next */
-		struct xfs_inode	*pip;
-
 		pip = xfs_iunlink_lookup_prev(pag, ip);
 		if (IS_ERR_OR_NULL(pip))
 			return -EFSCORRUPTED;
@@ -2139,8 +2167,12 @@ xfs_iunlink_remove_inode(
 		xfs_iunlink_log(tp, pip);
 	}
 
-	/* lookup the next inode and update to point at prev */
-	if (ip->i_next_unlinked != NULLAGINO) {
+	if (next_agino == NULLAGINO) {
+		/* only care about the tail of bucket 0 for xfs_iunlink() */
+		if (pag->pag_unlinked_tail == ip)
+			pag->pag_unlinked_tail = pip;
+	} else {
+		/* lookup the next inode and update to point at prev */
 		struct xfs_inode	*nip;
 
 		nip = xfs_iunlink_lookup_next(pag, ip);
@@ -2188,23 +2220,21 @@ xfs_iunlink(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*ip)
 {
-	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_buf		*agibp;
-	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+	struct xfs_buf		*agibp = NULL;
+	struct xfs_perag	*pag;
 	int			error;
 
 	ASSERT(VFS_I(ip)->i_nlink == 0);
 	ASSERT(VFS_I(ip)->i_mode != 0);
 	trace_xfs_iunlink(ip);
 
-	/* Get the agi buffer first.  It ensures lock ordering on the list. */
-	error = xfs_read_agi(mp, tp, agno, &agibp);
-	if (error)
-		return error;
+	pag = xfs_iunlink_insert_lock(XFS_INO_TO_AGNO(tp->t_mountp, ip->i_ino),
+		tp, ip, &agibp);
+	if (IS_ERR(pag))
+		return PTR_ERR(pag);
 
-	mutex_lock(&agibp->b_pag->pag_unlinked_mutex);
-	error = xfs_iunlink_insert_inode(tp, agibp, ip);
-	mutex_unlock(&agibp->b_pag->pag_unlinked_mutex);
+	error = xfs_iunlink_insert_inode(pag, tp, agibp, ip);
+	xfs_iunlink_unlock(pag);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d47eea31c165..30198aea7335 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2801,6 +2801,11 @@ xlog_recover_iunlink_ag(
 					prev_ip->i_next_unlinked = NULLAGINO;
 				break;
 			}
+
+			/* XXX: take pag_unlinked_mutex across the loop? */
+			if (!bucket)
+				agibp->b_pag->pag_unlinked_tail = ip;
+
 			if (prev_ip) {
 				ip->i_prev_unlinked = prev_agino;
 				xfs_irele(prev_ip);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f94e14059e61..f1cd3e9c4da5 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -224,6 +224,7 @@ xfs_initialize_perag(
 			first_initialised = index;
 		spin_lock_init(&pag->pag_state_lock);
 		mutex_init(&pag->pag_unlinked_mutex);
+		pag->pag_unlinked_tail = NULL;
 	}
 
 	index = xfs_set_inode_alloc(mp, agcount);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9a1d0f239fa4..934e7c373042 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -375,6 +375,9 @@ typedef struct xfs_perag {
 	/* lock to protect unlinked inode list */
 	struct mutex            pag_unlinked_mutex;
 
+	/* point to the inode tail of AGI unlinked bucket 0 */
+	struct xfs_inode	*pag_unlinked_tail;
+
 	/*
 	 * Unlinked inode information.  This incore information reflects
 	 * data stored in the AGI, so callers must hold the AGI buffer lock
-- 
2.18.1


  parent reply	other threads:[~2020-07-24  6:14 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-07 13:57 [RFC PATCH 0/2] xfs: more unlinked inode list optimization v1 Gao Xiang
2020-07-07 13:57 ` [RFC PATCH 1/2] xfs: arrange all unlinked inodes into one list Gao Xiang
2020-07-08 22:33   ` Dave Chinner
2020-07-09  0:17     ` Gao Xiang
2020-07-07 13:57 ` [RFC PATCH 2/2] xfs: don't access AGI on unlinked inodes if it can Gao Xiang
2020-07-08 17:03   ` Darrick J. Wong
2020-07-08 23:40     ` Gao Xiang
2020-07-08 23:33   ` Dave Chinner
2020-07-09  0:55     ` Gao Xiang
2020-07-09  2:32       ` Dave Chinner
2020-07-09 10:36         ` Gao Xiang
2020-07-09 10:47           ` Gao Xiang
2020-07-09 22:36           ` Dave Chinner
2020-07-24  6:12 ` [RFC PATCH v2 0/3] xfs: more unlinked inode list optimization v2 Gao Xiang
2020-07-24  6:12   ` [RFC PATCH v2 1/3] xfs: arrange all unlinked inodes into one list Gao Xiang
2020-07-24  6:12   ` [RFC PATCH v2 2/3] xfs: introduce perag iunlink lock Gao Xiang
2020-07-24  6:12   ` Gao Xiang [this message]
2020-08-18 13:30   ` [RFC PATCH v4 0/3] xfs: more unlinked inode list optimization v4 Gao Xiang
2020-08-18 13:30     ` [RFC PATCH v4 1/3] xfs: get rid of unused pagi_unlinked_hash Gao Xiang
2020-08-19  0:54       ` Darrick J. Wong
2020-08-21  1:09         ` Dave Chinner
2020-08-18 13:30     ` [RFC PATCH v4 2/3] xfs: introduce perag iunlink lock Gao Xiang
2020-08-19  1:06       ` Darrick J. Wong
2020-08-19  1:23         ` Gao Xiang
2020-08-18 13:30     ` [RFC PATCH v4 3/3] xfs: insert unlinked inodes from tail Gao Xiang
2020-08-19  0:53     ` [RFC PATCH v4 0/3] xfs: more unlinked inode list optimization v4 Darrick J. Wong
2020-08-19  1:14       ` Gao Xiang
2020-08-20  2:46     ` Darrick J. Wong
2020-08-20  4:01       ` Gao Xiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200724061259.5519-4-hsiangkao@redhat.com \
    --to=hsiangkao@redhat.com \
    --cc=bfoster@redhat.com \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.