Linux-XFS Archive on lore.kernel.org
 help / color / Atom feed
From: Gao Xiang <hsiangkao@redhat.com>
To: linux-xfs@vger.kernel.org
Cc: Dave Chinner <david@fromorbit.com>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	Brian Foster <bfoster@redhat.com>,
	Gao Xiang <hsiangkao@redhat.com>
Subject: [RFC PATCH v2 3/3] xfs: insert unlinked inodes from tail
Date: Fri, 24 Jul 2020 14:12:59 +0800
Message-ID: <20200724061259.5519-4-hsiangkao@redhat.com> (raw)
In-Reply-To: <20200724061259.5519-1-hsiangkao@redhat.com>

Currently, AGI buffer is always touched since xfs_iunlink()
adds unlinked inodes from head unconditionally, but since we
now have the only one unlinked list and if we insert unlinked
inodes from tail instead and there're more than 1 inodes, AGI
buffer modification could be avoided.

In order to do that, let's keep track of the tail of unlinked
inode into the perag all the time in order for xfs_iunlink()
to use.

With this change, it shows that only 938 of 10000 operations
modifies the head of unlinked list with the following workload:
 seq 1 10000 | xargs touch
 find . | xargs -n3 -P100 rm

Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
 fs/xfs/xfs_inode.c       | 120 ++++++++++++++++++++++++---------------
 fs/xfs/xfs_log_recover.c |   5 ++
 fs/xfs/xfs_mount.c       |   1 +
 fs/xfs/xfs_mount.h       |   3 +
 4 files changed, 84 insertions(+), 45 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d78aaa8ce252..3cfd84b76955 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1986,6 +1986,38 @@ xfs_iunlink_update_bucket(
 	return 0;
 }
 
+/*
+ * Lock the perag and take AGI lock if agi_unlinked is touched as well
+ * for xfs_iunlink_insert_inode(). As for the details of locking order,
+ * refer to the comments of xfs_iunlink_remove_lock().
+ */
+static struct xfs_perag *
+xfs_iunlink_insert_lock(
+	xfs_agino_t		agno,
+	struct xfs_trans        *tp,
+	struct xfs_inode	*ip,
+	struct xfs_buf		**agibpp)
+{
+	struct xfs_mount        *mp = tp->t_mountp;
+	struct xfs_perag	*pag;
+	int			error;
+
+	pag = xfs_perag_get(mp, agno);
+	mutex_lock(&pag->pag_unlinked_mutex);
+
+	if (!pag->pag_unlinked_tail) {
+		mutex_unlock(&pag->pag_unlinked_mutex);
+
+		error = xfs_read_agi(mp, tp, agno, agibpp);
+		if (error) {
+			xfs_perag_put(pag);
+			return ERR_PTR(error);
+		}
+		mutex_lock(&pag->pag_unlinked_mutex);
+	}
+	return pag;
+}
+
 /*
  * Always insert at the head, so we only have to do a next inode lookup to
  * update it's prev pointer. The AGI bucket will point at the one we are
@@ -1993,50 +2025,47 @@ xfs_iunlink_update_bucket(
  */
 static int
 xfs_iunlink_insert_inode(
+	struct xfs_perag	*pag,
 	struct xfs_trans	*tp,
 	struct xfs_buf		*agibp,
 	struct xfs_inode	*ip)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_agi		*agi = agibp->b_addr;
-	xfs_agino_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+	struct xfs_inode	*pip = pag->pag_unlinked_tail;
 	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	xfs_agino_t		next_agino;
 
-	/*
-	 * Get the index into the agi hash table for the list this inode will
-	 * go on.  Make sure the pointer isn't garbage and that this inode
-	 * isn't already on the list.
-	 */
-	next_agino = be32_to_cpu(agi->agi_unlinked[0]);
-	if (next_agino == agino ||
-	    !xfs_verify_agino_or_null(mp, agno, next_agino)) {
-		xfs_buf_mark_corrupt(agibp);
-		return -EFSCORRUPTED;
-	}
-
-	ip->i_prev_unlinked = NULLAGINO;
-	ip->i_next_unlinked = next_agino;
-	if (ip->i_next_unlinked != NULLAGINO) {
-		struct xfs_inode	*nip;
-
-		nip = xfs_iunlink_lookup_next(agibp->b_pag, ip);
-		if (IS_ERR_OR_NULL(nip))
-			return -EFSCORRUPTED;
+	if (!pip) {
+		xfs_agino_t	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+		struct xfs_agi	*agi = agibp->b_addr;
+		int		error;
 
-		if (nip->i_prev_unlinked != NULLAGINO) {
-			xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
-						NULL, 0, __this_address);
+		ip->i_prev_unlinked = NULLAGINO;
+		/*
+		 * Get the index into the agi hash table for the list this
+		 * inode will go on.  Make sure the pointer isn't garbage
+		 * and that this inode isn't already on the list.
+		 */
+		next_agino = be32_to_cpu(agi->agi_unlinked[0]);
+		if (next_agino == agino ||
+		    !xfs_verify_agino_or_null(mp, agno, next_agino)) {
+			xfs_buf_mark_corrupt(agibp);
 			return -EFSCORRUPTED;
 		}
-		nip->i_prev_unlinked = agino;
 
-		/* update the on disk inode now */
-		xfs_iunlink_log(tp, ip);
+		/* Point the head of the list to point to this inode. */
+		error = xfs_iunlink_update_bucket(tp, agno, agibp, 0, agino);
+		if (error)
+			return error;
+	} else {
+		ip->i_prev_unlinked = XFS_INO_TO_AGINO(mp, pip->i_ino);
+		ASSERT(pip->i_next_unlinked == NULLAGINO);
+		pip->i_next_unlinked = agino;
+		xfs_iunlink_log(tp, pip);
 	}
-
-	/* Point the head of the list to point to this inode. */
-	return xfs_iunlink_update_bucket(tp, agno, agibp, 0, agino);
+	ip->i_next_unlinked = NULLAGINO;
+	pag->pag_unlinked_tail = ip;
+	return 0;
 }
 
 /*
@@ -2095,6 +2124,7 @@ xfs_iunlink_remove_inode(
 	xfs_agino_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	xfs_agino_t		next_agino = ip->i_next_unlinked;
+	struct xfs_inode	*pip = NULL;
 	int			error;
 
 	if (ip->i_prev_unlinked == NULLAGINO) {
@@ -2122,8 +2152,6 @@ xfs_iunlink_remove_inode(
 			return -EFSCORRUPTED;
 	} else {
 		/* lookup previous inode and update to point at next */
-		struct xfs_inode	*pip;
-
 		pip = xfs_iunlink_lookup_prev(pag, ip);
 		if (IS_ERR_OR_NULL(pip))
 			return -EFSCORRUPTED;
@@ -2139,8 +2167,12 @@ xfs_iunlink_remove_inode(
 		xfs_iunlink_log(tp, pip);
 	}
 
-	/* lookup the next inode and update to point at prev */
-	if (ip->i_next_unlinked != NULLAGINO) {
+	if (next_agino == NULLAGINO) {
+		/* only care about the tail of bucket 0 for xfs_iunlink() */
+		if (pag->pag_unlinked_tail == ip)
+			pag->pag_unlinked_tail = pip;
+	} else {
+		/* lookup the next inode and update to point at prev */
 		struct xfs_inode	*nip;
 
 		nip = xfs_iunlink_lookup_next(pag, ip);
@@ -2188,23 +2220,21 @@ xfs_iunlink(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*ip)
 {
-	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_buf		*agibp;
-	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+	struct xfs_buf		*agibp = NULL;
+	struct xfs_perag	*pag;
 	int			error;
 
 	ASSERT(VFS_I(ip)->i_nlink == 0);
 	ASSERT(VFS_I(ip)->i_mode != 0);
 	trace_xfs_iunlink(ip);
 
-	/* Get the agi buffer first.  It ensures lock ordering on the list. */
-	error = xfs_read_agi(mp, tp, agno, &agibp);
-	if (error)
-		return error;
+	pag = xfs_iunlink_insert_lock(XFS_INO_TO_AGNO(tp->t_mountp, ip->i_ino),
+		tp, ip, &agibp);
+	if (IS_ERR(pag))
+		return PTR_ERR(pag);
 
-	mutex_lock(&agibp->b_pag->pag_unlinked_mutex);
-	error = xfs_iunlink_insert_inode(tp, agibp, ip);
-	mutex_unlock(&agibp->b_pag->pag_unlinked_mutex);
+	error = xfs_iunlink_insert_inode(pag, tp, agibp, ip);
+	xfs_iunlink_unlock(pag);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d47eea31c165..30198aea7335 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2801,6 +2801,11 @@ xlog_recover_iunlink_ag(
 					prev_ip->i_next_unlinked = NULLAGINO;
 				break;
 			}
+
+			/* XXX: take pag_unlinked_mutex across the loop? */
+			if (!bucket)
+				agibp->b_pag->pag_unlinked_tail = ip;
+
 			if (prev_ip) {
 				ip->i_prev_unlinked = prev_agino;
 				xfs_irele(prev_ip);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f94e14059e61..f1cd3e9c4da5 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -224,6 +224,7 @@ xfs_initialize_perag(
 			first_initialised = index;
 		spin_lock_init(&pag->pag_state_lock);
 		mutex_init(&pag->pag_unlinked_mutex);
+		pag->pag_unlinked_tail = NULL;
 	}
 
 	index = xfs_set_inode_alloc(mp, agcount);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9a1d0f239fa4..934e7c373042 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -375,6 +375,9 @@ typedef struct xfs_perag {
 	/* lock to protect unlinked inode list */
 	struct mutex            pag_unlinked_mutex;
 
+	/* point to the inode tail of AGI unlinked bucket 0 */
+	struct xfs_inode	*pag_unlinked_tail;
+
 	/*
 	 * Unlinked inode information.  This incore information reflects
 	 * data stored in the AGI, so callers must hold the AGI buffer lock
-- 
2.18.1


  parent reply index

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-07 13:57 [RFC PATCH 0/2] xfs: more unlinked inode list optimization v1 Gao Xiang
2020-07-07 13:57 ` [RFC PATCH 1/2] xfs: arrange all unlinked inodes into one list Gao Xiang
2020-07-08 22:33   ` Dave Chinner
2020-07-09  0:17     ` Gao Xiang
2020-07-07 13:57 ` [RFC PATCH 2/2] xfs: don't access AGI on unlinked inodes if it can Gao Xiang
2020-07-08 17:03   ` Darrick J. Wong
2020-07-08 23:40     ` Gao Xiang
2020-07-08 23:33   ` Dave Chinner
2020-07-09  0:55     ` Gao Xiang
2020-07-09  2:32       ` Dave Chinner
2020-07-09 10:36         ` Gao Xiang
2020-07-09 10:47           ` Gao Xiang
2020-07-09 22:36           ` Dave Chinner
2020-07-24  6:12 ` [RFC PATCH v2 0/3] xfs: more unlinked inode list optimization v2 Gao Xiang
2020-07-24  6:12   ` [RFC PATCH v2 1/3] xfs: arrange all unlinked inodes into one list Gao Xiang
2020-07-24  6:12   ` [RFC PATCH v2 2/3] xfs: introduce perag iunlink lock Gao Xiang
2020-07-24  6:12   ` Gao Xiang [this message]
2020-08-18 13:30   ` [RFC PATCH v4 0/3] xfs: more unlinked inode list optimization v4 Gao Xiang
2020-08-18 13:30     ` [RFC PATCH v4 1/3] xfs: get rid of unused pagi_unlinked_hash Gao Xiang
2020-08-19  0:54       ` Darrick J. Wong
2020-08-21  1:09         ` Dave Chinner
2020-08-18 13:30     ` [RFC PATCH v4 2/3] xfs: introduce perag iunlink lock Gao Xiang
2020-08-19  1:06       ` Darrick J. Wong
2020-08-19  1:23         ` Gao Xiang
2020-08-18 13:30     ` [RFC PATCH v4 3/3] xfs: insert unlinked inodes from tail Gao Xiang
2020-08-19  0:53     ` [RFC PATCH v4 0/3] xfs: more unlinked inode list optimization v4 Darrick J. Wong
2020-08-19  1:14       ` Gao Xiang
2020-08-20  2:46     ` Darrick J. Wong
2020-08-20  4:01       ` Gao Xiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200724061259.5519-4-hsiangkao@redhat.com \
    --to=hsiangkao@redhat.com \
    --cc=bfoster@redhat.com \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-XFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \
		linux-xfs@vger.kernel.org
	public-inbox-index linux-xfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git