From: Gao Xiang <hsiangkao@redhat.com>
To: linux-xfs@vger.kernel.org
Cc: Dave Chinner <david@fromorbit.com>,
"Darrick J. Wong" <darrick.wong@oracle.com>,
Brian Foster <bfoster@redhat.com>,
Gao Xiang <hsiangkao@redhat.com>
Subject: [RFC PATCH v2 3/3] xfs: insert unlinked inodes from tail
Date: Fri, 24 Jul 2020 14:12:59 +0800 [thread overview]
Message-ID: <20200724061259.5519-4-hsiangkao@redhat.com> (raw)
In-Reply-To: <20200724061259.5519-1-hsiangkao@redhat.com>
Currently, AGI buffer is always touched since xfs_iunlink()
adds unlinked inodes from head unconditionally, but since we
now have the only one unlinked list and if we insert unlinked
inodes from tail instead and there're more than 1 inodes, AGI
buffer modification could be avoided.
In order to do that, let's keep track of the tail of unlinked
inode into the perag all the time in order for xfs_iunlink()
to use.
With this change, it shows that only 938 of 10000 operations
modifies the head of unlinked list with the following workload:
seq 1 10000 | xargs touch
find . | xargs -n3 -P100 rm
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
fs/xfs/xfs_inode.c | 120 ++++++++++++++++++++++++---------------
fs/xfs/xfs_log_recover.c | 5 ++
fs/xfs/xfs_mount.c | 1 +
fs/xfs/xfs_mount.h | 3 +
4 files changed, 84 insertions(+), 45 deletions(-)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d78aaa8ce252..3cfd84b76955 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1986,6 +1986,38 @@ xfs_iunlink_update_bucket(
return 0;
}
+/*
+ * Lock the perag and take AGI lock if agi_unlinked is touched as well
+ * for xfs_iunlink_insert_inode(). As for the details of locking order,
+ * refer to the comments of xfs_iunlink_remove_lock().
+ */
+static struct xfs_perag *
+xfs_iunlink_insert_lock(
+ xfs_agino_t agno,
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ struct xfs_buf **agibpp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_perag *pag;
+ int error;
+
+ pag = xfs_perag_get(mp, agno);
+ mutex_lock(&pag->pag_unlinked_mutex);
+
+ if (!pag->pag_unlinked_tail) {
+ mutex_unlock(&pag->pag_unlinked_mutex);
+
+ error = xfs_read_agi(mp, tp, agno, agibpp);
+ if (error) {
+ xfs_perag_put(pag);
+ return ERR_PTR(error);
+ }
+ mutex_lock(&pag->pag_unlinked_mutex);
+ }
+ return pag;
+}
+
/*
* Always insert at the head, so we only have to do a next inode lookup to
* update it's prev pointer. The AGI bucket will point at the one we are
@@ -1993,50 +2025,47 @@ xfs_iunlink_update_bucket(
*/
static int
xfs_iunlink_insert_inode(
+ struct xfs_perag *pag,
struct xfs_trans *tp,
struct xfs_buf *agibp,
struct xfs_inode *ip)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_agi *agi = agibp->b_addr;
- xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+ struct xfs_inode *pip = pag->pag_unlinked_tail;
xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
xfs_agino_t next_agino;
- /*
- * Get the index into the agi hash table for the list this inode will
- * go on. Make sure the pointer isn't garbage and that this inode
- * isn't already on the list.
- */
- next_agino = be32_to_cpu(agi->agi_unlinked[0]);
- if (next_agino == agino ||
- !xfs_verify_agino_or_null(mp, agno, next_agino)) {
- xfs_buf_mark_corrupt(agibp);
- return -EFSCORRUPTED;
- }
-
- ip->i_prev_unlinked = NULLAGINO;
- ip->i_next_unlinked = next_agino;
- if (ip->i_next_unlinked != NULLAGINO) {
- struct xfs_inode *nip;
-
- nip = xfs_iunlink_lookup_next(agibp->b_pag, ip);
- if (IS_ERR_OR_NULL(nip))
- return -EFSCORRUPTED;
+ if (!pip) {
+ xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+ struct xfs_agi *agi = agibp->b_addr;
+ int error;
- if (nip->i_prev_unlinked != NULLAGINO) {
- xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
- NULL, 0, __this_address);
+ ip->i_prev_unlinked = NULLAGINO;
+ /*
+ * Get the index into the agi hash table for the list this
+ * inode will go on. Make sure the pointer isn't garbage
+ * and that this inode isn't already on the list.
+ */
+ next_agino = be32_to_cpu(agi->agi_unlinked[0]);
+ if (next_agino == agino ||
+ !xfs_verify_agino_or_null(mp, agno, next_agino)) {
+ xfs_buf_mark_corrupt(agibp);
return -EFSCORRUPTED;
}
- nip->i_prev_unlinked = agino;
- /* update the on disk inode now */
- xfs_iunlink_log(tp, ip);
+ /* Point the head of the list to point to this inode. */
+ error = xfs_iunlink_update_bucket(tp, agno, agibp, 0, agino);
+ if (error)
+ return error;
+ } else {
+ ip->i_prev_unlinked = XFS_INO_TO_AGINO(mp, pip->i_ino);
+ ASSERT(pip->i_next_unlinked == NULLAGINO);
+ pip->i_next_unlinked = agino;
+ xfs_iunlink_log(tp, pip);
}
-
- /* Point the head of the list to point to this inode. */
- return xfs_iunlink_update_bucket(tp, agno, agibp, 0, agino);
+ ip->i_next_unlinked = NULLAGINO;
+ pag->pag_unlinked_tail = ip;
+ return 0;
}
/*
@@ -2095,6 +2124,7 @@ xfs_iunlink_remove_inode(
xfs_agino_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
xfs_agino_t next_agino = ip->i_next_unlinked;
+ struct xfs_inode *pip = NULL;
int error;
if (ip->i_prev_unlinked == NULLAGINO) {
@@ -2122,8 +2152,6 @@ xfs_iunlink_remove_inode(
return -EFSCORRUPTED;
} else {
/* lookup previous inode and update to point at next */
- struct xfs_inode *pip;
-
pip = xfs_iunlink_lookup_prev(pag, ip);
if (IS_ERR_OR_NULL(pip))
return -EFSCORRUPTED;
@@ -2139,8 +2167,12 @@ xfs_iunlink_remove_inode(
xfs_iunlink_log(tp, pip);
}
- /* lookup the next inode and update to point at prev */
- if (ip->i_next_unlinked != NULLAGINO) {
+ if (next_agino == NULLAGINO) {
+ /* only care about the tail of bucket 0 for xfs_iunlink() */
+ if (pag->pag_unlinked_tail == ip)
+ pag->pag_unlinked_tail = pip;
+ } else {
+ /* lookup the next inode and update to point at prev */
struct xfs_inode *nip;
nip = xfs_iunlink_lookup_next(pag, ip);
@@ -2188,23 +2220,21 @@ xfs_iunlink(
struct xfs_trans *tp,
struct xfs_inode *ip)
{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_buf *agibp;
- xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+ struct xfs_buf *agibp = NULL;
+ struct xfs_perag *pag;
int error;
ASSERT(VFS_I(ip)->i_nlink == 0);
ASSERT(VFS_I(ip)->i_mode != 0);
trace_xfs_iunlink(ip);
- /* Get the agi buffer first. It ensures lock ordering on the list. */
- error = xfs_read_agi(mp, tp, agno, &agibp);
- if (error)
- return error;
+ pag = xfs_iunlink_insert_lock(XFS_INO_TO_AGNO(tp->t_mountp, ip->i_ino),
+ tp, ip, &agibp);
+ if (IS_ERR(pag))
+ return PTR_ERR(pag);
- mutex_lock(&agibp->b_pag->pag_unlinked_mutex);
- error = xfs_iunlink_insert_inode(tp, agibp, ip);
- mutex_unlock(&agibp->b_pag->pag_unlinked_mutex);
+ error = xfs_iunlink_insert_inode(pag, tp, agibp, ip);
+ xfs_iunlink_unlock(pag);
return error;
}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d47eea31c165..30198aea7335 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2801,6 +2801,11 @@ xlog_recover_iunlink_ag(
prev_ip->i_next_unlinked = NULLAGINO;
break;
}
+
+ /* XXX: take pag_unlinked_mutex across the loop? */
+ if (!bucket)
+ agibp->b_pag->pag_unlinked_tail = ip;
+
if (prev_ip) {
ip->i_prev_unlinked = prev_agino;
xfs_irele(prev_ip);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f94e14059e61..f1cd3e9c4da5 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -224,6 +224,7 @@ xfs_initialize_perag(
first_initialised = index;
spin_lock_init(&pag->pag_state_lock);
mutex_init(&pag->pag_unlinked_mutex);
+ pag->pag_unlinked_tail = NULL;
}
index = xfs_set_inode_alloc(mp, agcount);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9a1d0f239fa4..934e7c373042 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -375,6 +375,9 @@ typedef struct xfs_perag {
/* lock to protect unlinked inode list */
struct mutex pag_unlinked_mutex;
+ /* point to the inode tail of AGI unlinked bucket 0 */
+ struct xfs_inode *pag_unlinked_tail;
+
/*
* Unlinked inode information. This incore information reflects
* data stored in the AGI, so callers must hold the AGI buffer lock
--
2.18.1
next prev parent reply other threads:[~2020-07-24 6:14 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-07 13:57 [RFC PATCH 0/2] xfs: more unlinked inode list optimization v1 Gao Xiang
2020-07-07 13:57 ` [RFC PATCH 1/2] xfs: arrange all unlinked inodes into one list Gao Xiang
2020-07-08 22:33 ` Dave Chinner
2020-07-09 0:17 ` Gao Xiang
2020-07-07 13:57 ` [RFC PATCH 2/2] xfs: don't access AGI on unlinked inodes if it can Gao Xiang
2020-07-08 17:03 ` Darrick J. Wong
2020-07-08 23:40 ` Gao Xiang
2020-07-08 23:33 ` Dave Chinner
2020-07-09 0:55 ` Gao Xiang
2020-07-09 2:32 ` Dave Chinner
2020-07-09 10:36 ` Gao Xiang
2020-07-09 10:47 ` Gao Xiang
2020-07-09 22:36 ` Dave Chinner
2020-07-24 6:12 ` [RFC PATCH v2 0/3] xfs: more unlinked inode list optimization v2 Gao Xiang
2020-07-24 6:12 ` [RFC PATCH v2 1/3] xfs: arrange all unlinked inodes into one list Gao Xiang
2020-07-24 6:12 ` [RFC PATCH v2 2/3] xfs: introduce perag iunlink lock Gao Xiang
2020-07-24 6:12 ` Gao Xiang [this message]
2020-08-18 13:30 ` [RFC PATCH v4 0/3] xfs: more unlinked inode list optimization v4 Gao Xiang
2020-08-18 13:30 ` [RFC PATCH v4 1/3] xfs: get rid of unused pagi_unlinked_hash Gao Xiang
2020-08-19 0:54 ` Darrick J. Wong
2020-08-21 1:09 ` Dave Chinner
2020-08-18 13:30 ` [RFC PATCH v4 2/3] xfs: introduce perag iunlink lock Gao Xiang
2020-08-19 1:06 ` Darrick J. Wong
2020-08-19 1:23 ` Gao Xiang
2020-08-18 13:30 ` [RFC PATCH v4 3/3] xfs: insert unlinked inodes from tail Gao Xiang
2020-08-19 0:53 ` [RFC PATCH v4 0/3] xfs: more unlinked inode list optimization v4 Darrick J. Wong
2020-08-19 1:14 ` Gao Xiang
2020-08-20 2:46 ` Darrick J. Wong
2020-08-20 4:01 ` Gao Xiang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200724061259.5519-4-hsiangkao@redhat.com \
--to=hsiangkao@redhat.com \
--cc=bfoster@redhat.com \
--cc=darrick.wong@oracle.com \
--cc=david@fromorbit.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).