Linux-XFS Archive on lore.kernel.org
 help / color / Atom feed
From: Gao Xiang <hsiangkao@redhat.com>
To: linux-xfs@vger.kernel.org
Cc: Dave Chinner <david@fromorbit.com>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	Brian Foster <bfoster@redhat.com>,
	Gao Xiang <hsiangkao@redhat.com>
Subject: [RFC PATCH v4 3/3] xfs: insert unlinked inodes from tail
Date: Tue, 18 Aug 2020 21:30:15 +0800
Message-ID: <20200818133015.25398-4-hsiangkao@redhat.com> (raw)
In-Reply-To: <20200818133015.25398-1-hsiangkao@redhat.com>

Currently, AGI buffer is always touched since xfs_iunlink()
adds unlinked inodes from head unconditionally, but since we
have the only one unlinked list now and if we insert unlinked
inodes from tail instead and there're more than 1 inode, AGI
buffer could be untouched.

With this change, it shows that only 938 of 10000 operations
modifies the head of unlinked list with the following workload:
 seq 1 10000 | xargs touch
 find . | xargs -n3 -P100 rm

Note that xfs_iunlink_insert_lock() is slightly different from
xfs_iunlink_remove_lock() due to whiteout path, refer to inlined
comments for more details.

Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
 fs/xfs/xfs_inode.c | 99 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 74 insertions(+), 25 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f32a1172b5cd..0add263d21a8 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1975,30 +1975,88 @@ xfs_iunlink_update_bucket(
 
 static int
 xfs_iunlink_insert_inode(
+	struct xfs_perag	*pag,
 	struct xfs_trans	*tp,
 	struct xfs_buf		*agibp,
 	struct xfs_inode	*ip)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_inode	*nip;
-	xfs_agino_t		next_agino = NULLAGINO;
 	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 
-	nip = list_first_entry_or_null(&agibp->b_pag->pag_ici_unlink_list,
-					struct xfs_inode, i_unlink);
-	if (nip) {
+	if (!list_empty(&pag->pag_ici_unlink_list)) {
+		struct xfs_inode	*pip;
 
 		/*
-		 * There is already another inode in the bucket, so point this
-		 * inode to the current head of the list.
+		 * There is already another inode in the bucket, so point
+		 * the last inode to this inode.
 		 */
-		next_agino = XFS_INO_TO_AGINO(mp, nip->i_ino);
-		xfs_iunlink_log(tp, ip, NULLAGINO, next_agino);
+		pip = list_last_entry(&pag->pag_ici_unlink_list,
+				struct xfs_inode, i_unlink);
+		xfs_iunlink_log(tp, pip, NULLAGINO, agino);
+		return 0;
 	}
 
+	ASSERT(agibp);
 	/* Point the head of the list to point to this inode. */
-	return xfs_iunlink_update_bucket(tp, agno, agibp, next_agino, agino);
+	return xfs_iunlink_update_bucket(tp, agno, agibp, NULLAGINO, agino);
+}
+
+/*
+ * Lock the perag and take AGI lock if agi_unlinked is touched as well
+ * for xfs_iunlink_insert_inode(). As for the details of locking order,
+ * refer to the comments of xfs_iunlink_remove_lock().
+ */
+static struct xfs_perag *
+xfs_iunlink_insert_lock(
+	xfs_agino_t		agno,
+	struct xfs_trans        *tp,
+	struct xfs_inode	*ip,
+	struct xfs_buf		**agibpp)
+{
+	struct xfs_mount        *mp = tp->t_mountp;
+	bool			locked = true;
+	struct xfs_perag	*pag;
+	int			error;
+
+	pag = xfs_perag_get(mp, agno);
+	/* paired with smp_store_release() in xfs_iunlink_unlock() */
+	if (smp_load_acquire(&pag->pag_iunlink_trans) == tp) {
+		/*
+		 * if pag_iunlink_trans is the current trans, we're
+		 * in the current process context, so it's safe here.
+		 */
+		ASSERT(mutex_is_locked(&pag->pag_iunlink_mutex));
+		/*
+		 * slightly different from xfs_iunlink_remove_lock(),
+		 * the path of xfs_iunlink_remove() and then xfs_iunlink()
+		 * on the same AG needs to be considered (e.g. whiteout
+		 * rename), so lock AGI first in xfs_iunlink_remove(),
+		 * and recursively get AGI safely below.
+		 */
+		if (!list_empty(&pag->pag_ici_unlink_list))
+			goto out;
+	} else {
+		mutex_lock(&pag->pag_iunlink_mutex);
+		if (!list_empty(&pag->pag_ici_unlink_list))
+			goto out;
+		mutex_unlock(&pag->pag_iunlink_mutex);
+		locked = false;
+	}
+
+	/* and see comments in xfs_iunlink_remove_lock() on locking order */
+	error = xfs_read_agi(mp, tp, agno, agibpp);
+	if (error) {
+		xfs_perag_put(pag);
+		return ERR_PTR(error);
+	}
+
+	if (!locked)
+		mutex_lock(&pag->pag_iunlink_mutex);
+out:
+	WRITE_ONCE(pag->pag_iunlink_trans, tp);
+	++pag->pag_iunlink_refcnt;
+	return pag;
 }
 
 void
@@ -2026,7 +2084,7 @@ xfs_iunlink(
 	struct xfs_inode	*ip)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_buf		*agibp;
+	struct xfs_buf		*agibp = NULL;
 	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 	int			error;
 	struct xfs_perag	*pag;
@@ -2035,18 +2093,9 @@ xfs_iunlink(
 	ASSERT(VFS_I(ip)->i_mode != 0);
 	trace_xfs_iunlink(ip);
 
-	/* Get the agi buffer first.  It ensures lock ordering on the list. */
-	error = xfs_read_agi(mp, tp, agno, &agibp);
-	if (error)
-		return error;
-
-	/* XXX: will be shortly removed instead in the next commit. */
-	pag = xfs_perag_get(mp, agno);
-	/* paired with smp_store_release() in xfs_iunlink_unlock() */
-	if (smp_load_acquire(&pag->pag_iunlink_trans) != tp)
-		mutex_lock(&pag->pag_iunlink_mutex);
-	WRITE_ONCE(pag->pag_iunlink_trans, tp);
-	++pag->pag_iunlink_refcnt;
+	pag = xfs_iunlink_insert_lock(agno, tp, ip, &agibp);
+	if (IS_ERR(pag))
+		return PTR_ERR(pag);
 
 	/*
 	 * Insert the inode into the on disk unlinked list, and if that
@@ -2054,9 +2103,9 @@ xfs_iunlink(
 	 * order so that the modifications required to the on disk list are not
 	 * impacted by already having this inode in the list.
 	 */
-	error = xfs_iunlink_insert_inode(tp, agibp, ip);
+	error = xfs_iunlink_insert_inode(pag, tp, agibp, ip);
 	if (!error)
-		list_add(&ip->i_unlink, &agibp->b_pag->pag_ici_unlink_list);
+		list_add_tail(&ip->i_unlink, &pag->pag_ici_unlink_list);
 
 	xfs_iunlink_unlock(pag);
 	return error;
-- 
2.18.1


  parent reply index

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-07 13:57 [RFC PATCH 0/2] xfs: more unlinked inode list optimization v1 Gao Xiang
2020-07-07 13:57 ` [RFC PATCH 1/2] xfs: arrange all unlinked inodes into one list Gao Xiang
2020-07-08 22:33   ` Dave Chinner
2020-07-09  0:17     ` Gao Xiang
2020-07-07 13:57 ` [RFC PATCH 2/2] xfs: don't access AGI on unlinked inodes if it can Gao Xiang
2020-07-08 17:03   ` Darrick J. Wong
2020-07-08 23:40     ` Gao Xiang
2020-07-08 23:33   ` Dave Chinner
2020-07-09  0:55     ` Gao Xiang
2020-07-09  2:32       ` Dave Chinner
2020-07-09 10:36         ` Gao Xiang
2020-07-09 10:47           ` Gao Xiang
2020-07-09 22:36           ` Dave Chinner
2020-07-24  6:12 ` [RFC PATCH v2 0/3] xfs: more unlinked inode list optimization v2 Gao Xiang
2020-07-24  6:12   ` [RFC PATCH v2 1/3] xfs: arrange all unlinked inodes into one list Gao Xiang
2020-07-24  6:12   ` [RFC PATCH v2 2/3] xfs: introduce perag iunlink lock Gao Xiang
2020-07-24  6:12   ` [RFC PATCH v2 3/3] xfs: insert unlinked inodes from tail Gao Xiang
2020-08-18 13:30   ` [RFC PATCH v4 0/3] xfs: more unlinked inode list optimization v4 Gao Xiang
2020-08-18 13:30     ` [RFC PATCH v4 1/3] xfs: get rid of unused pagi_unlinked_hash Gao Xiang
2020-08-19  0:54       ` Darrick J. Wong
2020-08-21  1:09         ` Dave Chinner
2020-08-18 13:30     ` [RFC PATCH v4 2/3] xfs: introduce perag iunlink lock Gao Xiang
2020-08-19  1:06       ` Darrick J. Wong
2020-08-19  1:23         ` Gao Xiang
2020-08-18 13:30     ` Gao Xiang [this message]
2020-08-19  0:53     ` [RFC PATCH v4 0/3] xfs: more unlinked inode list optimization v4 Darrick J. Wong
2020-08-19  1:14       ` Gao Xiang
2020-08-20  2:46     ` Darrick J. Wong
2020-08-20  4:01       ` Gao Xiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200818133015.25398-4-hsiangkao@redhat.com \
    --to=hsiangkao@redhat.com \
    --cc=bfoster@redhat.com \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-XFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \
		linux-xfs@vger.kernel.org
	public-inbox-index linux-xfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git