Linux-XFS Archive on lore.kernel.org
 help / color / Atom feed
From: Gao Xiang <hsiangkao@redhat.com>
To: linux-xfs@vger.kernel.org
Cc: Dave Chinner <david@fromorbit.com>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	Brian Foster <bfoster@redhat.com>,
	Gao Xiang <hsiangkao@redhat.com>
Subject: [RFC PATCH v4 2/3] xfs: introduce perag iunlink lock
Date: Tue, 18 Aug 2020 21:30:14 +0800
Message-ID: <20200818133015.25398-3-hsiangkao@redhat.com> (raw)
In-Reply-To: <20200818133015.25398-1-hsiangkao@redhat.com>

Currently, we use AGI buffer lock to protect in-memory linked
list for unlinked inodes but since it's not necessary to modify
AGI unless the head of the unlinked list is modified.

Therefore, let's add another per-AG dedicated lock to protect
the whole list, and get rid of taking AGI buffer lock in
xfs_iunlink_remove() if the head is untouched.

Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
 fs/xfs/xfs_inode.c        | 111 +++++++++++++++++++++++++++++++++-----
 fs/xfs/xfs_inode.h        |   1 +
 fs/xfs/xfs_iunlink_item.c |  16 ++++++
 fs/xfs/xfs_mount.c        |   4 ++
 fs/xfs/xfs_mount.h        |   9 ++++
 5 files changed, 128 insertions(+), 13 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7ee778bcde06..f32a1172b5cd 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -47,7 +47,7 @@ kmem_zone_t *xfs_inode_zone;
 #define	XFS_ITRUNC_MAX_EXTENTS	2
 
 STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
-STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *);
+STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *, bool);
 
 /*
  * helper function to extract extent size hint from inode
@@ -1398,7 +1398,7 @@ xfs_link(
 	 * Handle initial link state of O_TMPFILE inode
 	 */
 	if (VFS_I(sip)->i_nlink == 0) {
-		error = xfs_iunlink_remove(tp, sip);
+		error = xfs_iunlink_remove(tp, sip, false);
 		if (error)
 			goto error_return;
 	}
@@ -2001,6 +2001,18 @@ xfs_iunlink_insert_inode(
 	return xfs_iunlink_update_bucket(tp, agno, agibp, next_agino, agino);
 }
 
+void
+xfs_iunlink_unlock(
+	struct xfs_perag	*pag)
+{
+	/* Does not unlock AGI, ever. xfs_trans_commit() does that. */
+	if (!--pag->pag_iunlink_refcnt) {
+		smp_store_release(&pag->pag_iunlink_trans, NULL);
+		mutex_unlock(&pag->pag_iunlink_mutex);
+	}
+	xfs_perag_put(pag);
+}
+
 /*
  * This is called when the inode's link count has gone to 0 or we are creating
  * a tmpfile via O_TMPFILE.  The inode @ip must have nlink == 0.
@@ -2017,6 +2029,7 @@ xfs_iunlink(
 	struct xfs_buf		*agibp;
 	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 	int			error;
+	struct xfs_perag	*pag;
 
 	ASSERT(VFS_I(ip)->i_nlink == 0);
 	ASSERT(VFS_I(ip)->i_mode != 0);
@@ -2027,6 +2040,14 @@ xfs_iunlink(
 	if (error)
 		return error;
 
+	/* XXX: will be shortly removed instead in the next commit. */
+	pag = xfs_perag_get(mp, agno);
+	/* paired with smp_store_release() in xfs_iunlink_unlock() */
+	if (smp_load_acquire(&pag->pag_iunlink_trans) != tp)
+		mutex_lock(&pag->pag_iunlink_mutex);
+	WRITE_ONCE(pag->pag_iunlink_trans, tp);
+	++pag->pag_iunlink_refcnt;
+
 	/*
 	 * Insert the inode into the on disk unlinked list, and if that
 	 * succeeds, then insert it into the in memory list. We do it in this
@@ -2037,11 +2058,72 @@ xfs_iunlink(
 	if (!error)
 		list_add(&ip->i_unlink, &agibp->b_pag->pag_ici_unlink_list);
 
+	xfs_iunlink_unlock(pag);
 	return error;
 }
 
+/*
+ * Lock the perag and take AGI lock if agi_unlinked is touched as well for
+ * xfs_iunlink_remove_inode().
+ *
+ * Inode allocation in the O_TMPFILE path defines the AGI/unlinked list lock
+ * order as being AGI->perag unlinked list lock. We are inverting it here as
+ * the fast path tail addition does not need to modify the AGI at all. Hence
+ * AGI lock is only needed if the head is modified, correct locking order.
+ */
+static struct xfs_perag *
+xfs_iunlink_remove_lock(
+	xfs_agino_t		agno,
+	struct xfs_trans        *tp,
+	struct xfs_inode	*ip,
+	struct xfs_buf		**agibpp,
+	bool			force_agi)
+{
+	struct xfs_mount        *mp = tp->t_mountp;
+	struct xfs_perag	*pag;
+	int			error;
+
+	pag = xfs_perag_get(mp, agno);
+	/* paired with smp_store_release() in xfs_iunlink_unlock() */
+	if (smp_load_acquire(&pag->pag_iunlink_trans) == tp) {
+		/*
+		 * if pag_iunlink_trans is the current trans, we're
+		 * in the current process context, so it's safe here.
+		 */
+		ASSERT(mutex_is_locked(&pag->pag_iunlink_mutex));
+		goto out;
+	}
+
+	if (!force_agi) {
+		mutex_lock(&pag->pag_iunlink_mutex);
+		if (ip != list_first_entry(&pag->pag_ici_unlink_list,
+				struct xfs_inode, i_unlink))
+			goto out;
+		mutex_unlock(&pag->pag_iunlink_mutex);
+	}
+
+	/*
+	 * some paths (e.g. xfs_create_tmpfile) could take AGI lock in
+	 * this transaction in advance and we should keep the proper
+	 * locking order: AGI buf lock and then pag_iunlink_mutex.
+	 */
+	error = xfs_read_agi(mp, tp, agno, agibpp);
+	if (error) {
+		xfs_perag_put(pag);
+		return ERR_PTR(error);
+	}
+
+	mutex_lock(&pag->pag_iunlink_mutex);
+	ASSERT(!pag->pag_iunlink_refcnt);
+out:
+	WRITE_ONCE(pag->pag_iunlink_trans, tp);
+	++pag->pag_iunlink_refcnt;
+	return pag;
+}
+
 static int
 xfs_iunlink_remove_inode(
+	struct xfs_perag	*pag,
 	struct xfs_trans	*tp,
 	struct xfs_buf		*agibp,
 	struct xfs_inode	*ip)
@@ -2055,7 +2137,7 @@ xfs_iunlink_remove_inode(
 	 * Get the next agino in the list. If we are at the end of the list,
 	 * then the previous inode's i_next_unlinked filed will get cleared.
 	 */
-	if (ip != list_last_entry(&agibp->b_pag->pag_ici_unlink_list,
+	if (ip != list_last_entry(&pag->pag_ici_unlink_list,
 					struct xfs_inode, i_unlink)) {
 		struct xfs_inode *nip = list_next_entry(ip, i_unlink);
 
@@ -2065,7 +2147,7 @@ xfs_iunlink_remove_inode(
 	/* Clear the on disk next unlinked pointer for this inode. */
 	xfs_iunlink_log(tp, ip, next_agino, NULLAGINO);
 
-	if (ip != list_first_entry(&agibp->b_pag->pag_ici_unlink_list,
+	if (ip != list_first_entry(&pag->pag_ici_unlink_list,
 					struct xfs_inode, i_unlink)) {
 		struct xfs_inode *pip = list_prev_entry(ip, i_unlink);
 
@@ -2073,6 +2155,7 @@ xfs_iunlink_remove_inode(
 		return 0;
 	}
 
+	ASSERT(agibp);
 	/* Point the head of the list to the next unlinked inode. */
 	return xfs_iunlink_update_bucket(tp, agno, agibp, agino, next_agino);
 }
@@ -2083,27 +2166,29 @@ xfs_iunlink_remove_inode(
 STATIC int
 xfs_iunlink_remove(
 	struct xfs_trans	*tp,
-	struct xfs_inode	*ip)
+	struct xfs_inode	*ip,
+	bool			force_agi)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_buf		*agibp;
+	struct xfs_buf		*agibp = NULL;
 	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+	struct xfs_perag	*pag;
 	int			error;
 
 	trace_xfs_iunlink_remove(ip);
 
-	/* Get the agi buffer first.  It ensures lock ordering on the list. */
-	error = xfs_read_agi(mp, tp, agno, &agibp);
-	if (error)
-		return error;
+	pag = xfs_iunlink_remove_lock(agno, tp, ip, &agibp, force_agi);
+	if (IS_ERR(pag))
+		return PTR_ERR(pag);
 
 	/*
 	 * Remove the inode from the on-disk list and then remove it from the
 	 * in-memory list. This order of operations ensures we can look up both
 	 * next and previous inode in the on-disk list via the in-memory list.
 	 */
-	error = xfs_iunlink_remove_inode(tp, agibp, ip);
+	error = xfs_iunlink_remove_inode(pag, tp, agibp, ip);
 	list_del(&ip->i_unlink);
+	xfs_iunlink_unlock(pag);
 	return error;
 }
 
@@ -2316,7 +2401,7 @@ xfs_ifree(
 	if (error)
 		return error;
 
-	error = xfs_iunlink_remove(tp, ip);
+	error = xfs_iunlink_remove(tp, ip, false);
 	if (error)
 		return error;
 
@@ -2895,7 +2980,7 @@ xfs_rename(
 	 */
 	if (wip) {
 		ASSERT(VFS_I(wip)->i_nlink == 0);
-		error = xfs_iunlink_remove(tp, wip);
+		error = xfs_iunlink_remove(tp, wip, true);
 		if (error)
 			goto out_trans_cancel;
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 7f8fbb7c8594..d0a221af71db 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -468,5 +468,6 @@ void xfs_end_io(struct work_struct *work);
 
 int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
 void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
+void xfs_iunlink_unlock(struct xfs_perag *pag);
 
 #endif	/* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_iunlink_item.c b/fs/xfs/xfs_iunlink_item.c
index 2ee05f98aa97..ae1e73e465b2 100644
--- a/fs/xfs/xfs_iunlink_item.c
+++ b/fs/xfs/xfs_iunlink_item.c
@@ -16,6 +16,7 @@
 #include "xfs_iunlink_item.h"
 #include "xfs_trace.h"
 #include "xfs_error.h"
+#include "xfs_sb.h"
 
 struct kmem_cache	*xfs_iunlink_zone;
 
@@ -28,6 +29,13 @@ static void
 xfs_iunlink_item_release(
 	struct xfs_log_item	*lip)
 {
+	struct xfs_mount	*mp = lip->li_mountp;
+	struct xfs_inode        *ip = IUL_ITEM(lip)->iu_ip;
+	struct xfs_perag	*pag;
+
+	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+	ASSERT(mutex_is_locked(&pag->pag_iunlink_mutex));
+	xfs_iunlink_unlock(pag);
 	kmem_cache_free(xfs_iunlink_zone, IUL_ITEM(lip));
 }
 
@@ -150,7 +158,9 @@ xfs_iunlink_log(
 	xfs_agino_t		old_agino,
 	xfs_agino_t		next_agino)
 {
+	struct xfs_mount        *mp = tp->t_mountp;
 	struct xfs_iunlink_item	*iup;
+	struct xfs_perag	*pag;
 
 	iup = kmem_cache_zalloc(xfs_iunlink_zone, GFP_KERNEL | __GFP_NOFAIL);
 
@@ -164,5 +174,11 @@ xfs_iunlink_log(
 	xfs_trans_add_item(tp, &iup->iu_item);
 	tp->t_flags |= XFS_TRANS_DIRTY;
 	set_bit(XFS_LI_DIRTY, &iup->iu_item.li_flags);
+
+	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+	ASSERT(mutex_is_locked(&pag->pag_iunlink_mutex));
+	ASSERT(pag->pag_iunlink_trans == tp);
+	++pag->pag_iunlink_refcnt;
+	xfs_perag_put(pag);
 }
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f28c969af272..82d264a3350d 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -224,6 +224,10 @@ xfs_initialize_perag(
 		if (first_initialised == NULLAGNUMBER)
 			first_initialised = index;
 		spin_lock_init(&pag->pag_state_lock);
+
+		mutex_init(&pag->pag_iunlink_mutex);
+		pag->pag_iunlink_refcnt = 0;
+		pag->pag_iunlink_trans = NULL;
 	}
 
 	index = xfs_set_inode_alloc(mp, agcount);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 98109801a995..fca4c1d28d8e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -372,6 +372,15 @@ typedef struct xfs_perag {
 
 	/* reference count */
 	uint8_t			pagf_refcount_level;
+
+	/* lock to protect unlinked inode list and refcnt */
+	struct mutex            pag_iunlink_mutex;
+
+	/* recursive count of pag_iunlink_mutex */
+	unsigned int		pag_iunlink_refcnt;
+
+	/* (lockless) by which pag_iunlink_mutex is taken */
+	struct xfs_trans	*pag_iunlink_trans;
 } xfs_perag_t;
 
 static inline struct xfs_ag_resv *
-- 
2.18.1


  parent reply index

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-07 13:57 [RFC PATCH 0/2] xfs: more unlinked inode list optimization v1 Gao Xiang
2020-07-07 13:57 ` [RFC PATCH 1/2] xfs: arrange all unlinked inodes into one list Gao Xiang
2020-07-08 22:33   ` Dave Chinner
2020-07-09  0:17     ` Gao Xiang
2020-07-07 13:57 ` [RFC PATCH 2/2] xfs: don't access AGI on unlinked inodes if it can Gao Xiang
2020-07-08 17:03   ` Darrick J. Wong
2020-07-08 23:40     ` Gao Xiang
2020-07-08 23:33   ` Dave Chinner
2020-07-09  0:55     ` Gao Xiang
2020-07-09  2:32       ` Dave Chinner
2020-07-09 10:36         ` Gao Xiang
2020-07-09 10:47           ` Gao Xiang
2020-07-09 22:36           ` Dave Chinner
2020-07-24  6:12 ` [RFC PATCH v2 0/3] xfs: more unlinked inode list optimization v2 Gao Xiang
2020-07-24  6:12   ` [RFC PATCH v2 1/3] xfs: arrange all unlinked inodes into one list Gao Xiang
2020-07-24  6:12   ` [RFC PATCH v2 2/3] xfs: introduce perag iunlink lock Gao Xiang
2020-07-24  6:12   ` [RFC PATCH v2 3/3] xfs: insert unlinked inodes from tail Gao Xiang
2020-08-18 13:30   ` [RFC PATCH v4 0/3] xfs: more unlinked inode list optimization v4 Gao Xiang
2020-08-18 13:30     ` [RFC PATCH v4 1/3] xfs: get rid of unused pagi_unlinked_hash Gao Xiang
2020-08-19  0:54       ` Darrick J. Wong
2020-08-21  1:09         ` Dave Chinner
2020-08-18 13:30     ` Gao Xiang [this message]
2020-08-19  1:06       ` [RFC PATCH v4 2/3] xfs: introduce perag iunlink lock Darrick J. Wong
2020-08-19  1:23         ` Gao Xiang
2020-08-18 13:30     ` [RFC PATCH v4 3/3] xfs: insert unlinked inodes from tail Gao Xiang
2020-08-19  0:53     ` [RFC PATCH v4 0/3] xfs: more unlinked inode list optimization v4 Darrick J. Wong
2020-08-19  1:14       ` Gao Xiang
2020-08-20  2:46     ` Darrick J. Wong
2020-08-20  4:01       ` Gao Xiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200818133015.25398-3-hsiangkao@redhat.com \
    --to=hsiangkao@redhat.com \
    --cc=bfoster@redhat.com \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-XFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \
		linux-xfs@vger.kernel.org
	public-inbox-index linux-xfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git