From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 09/10] xfs: parallelize inode inactivation
Date: Tue, 31 Dec 2019 17:09:36 -0800 [thread overview]
Message-ID: <157784097668.1362752.16785191645786207862.stgit@magnolia> (raw)
In-Reply-To: <157784092020.1362752.15046503361741521784.stgit@magnolia>
From: Darrick J. Wong <darrick.wong@oracle.com>
Split the inode inactivation work into per-AG work items so that we can
take advantage of parallelization.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/scrub/common.c | 2 +
fs/xfs/xfs_icache.c | 90 ++++++++++++++++++++++++++++++++++++++++++-------
fs/xfs/xfs_icache.h | 2 +
fs/xfs/xfs_mount.c | 3 ++
fs/xfs/xfs_mount.h | 4 ++
fs/xfs/xfs_super.c | 5 ++-
6 files changed, 90 insertions(+), 16 deletions(-)
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 52fc05ee7ef8..402d42a277f4 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -910,6 +910,7 @@ xchk_stop_reaping(
{
sc->flags |= XCHK_REAPING_DISABLED;
xfs_blockgc_stop(sc->mp);
+ xfs_inactive_cancel_work(sc->mp);
}
/* Restart background reaping of resources. */
@@ -917,6 +918,7 @@ void
xchk_start_reaping(
struct xfs_scrub *sc)
{
+ xfs_inactive_schedule_now(sc->mp);
xfs_blockgc_start(sc->mp);
sc->flags &= ~XCHK_REAPING_DISABLED;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 13b318dc2e89..5240e9e517d7 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -2130,12 +2130,12 @@ xfs_inode_clear_cowblocks_tag(
/* Queue a new inode inactivation pass if there are reclaimable inodes. */
static void
xfs_inactive_work_queue(
- struct xfs_mount *mp)
+ struct xfs_perag *pag)
{
rcu_read_lock();
- if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_INACTIVE_TAG))
- queue_delayed_work(mp->m_inactive_workqueue,
- &mp->m_inactive_work,
+ if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_INACTIVE_TAG))
+ queue_delayed_work(pag->pag_mount->m_inactive_workqueue,
+ &pag->pag_inactive_work,
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
rcu_read_unlock();
}
@@ -2158,7 +2158,7 @@ xfs_perag_set_inactive_tag(
spin_unlock(&mp->m_perag_lock);
/* schedule periodic background inode inactivation */
- xfs_inactive_work_queue(mp);
+ xfs_inactive_work_queue(pag);
trace_xfs_perag_set_inactive(mp, pag->pag_agno, -1, _RET_IP_);
}
@@ -2275,6 +2275,19 @@ static const struct xfs_ici_walk_ops xfs_inactive_iwalk_ops = {
.iwalk = xfs_inactive_inode,
};
+/*
+ * Inactivate the inodes in an AG. Even if the filesystem is corrupted, we
+ * still need to clear the INACTIVE iflag so that we can move on to reclaiming
+ * the inode.
+ */
+static int
+xfs_inactive_inodes_pag(
+ struct xfs_perag *pag)
+{
+ return xfs_ici_walk_ag(pag, &xfs_inactive_iwalk_ops, 0, NULL,
+ XFS_ICI_INACTIVE_TAG);
+}
+
/*
* Walk the AGs and reclaim the inodes in them. Even if the filesystem is
* corrupted, we still need to clear the INACTIVE iflag so that we can move
@@ -2294,8 +2307,9 @@ void
xfs_inactive_worker(
struct work_struct *work)
{
- struct xfs_mount *mp = container_of(to_delayed_work(work),
- struct xfs_mount, m_inactive_work);
+ struct xfs_perag *pag = container_of(to_delayed_work(work),
+ struct xfs_perag, pag_inactive_work);
+ struct xfs_mount *mp = pag->pag_mount;
int error;
/*
@@ -2310,12 +2324,31 @@ xfs_inactive_worker(
if (!sb_start_write_trylock(mp->m_super))
return;
- error = xfs_inactive_inodes(mp, NULL);
+ error = xfs_inactive_inodes_pag(pag);
if (error && error != -EAGAIN)
xfs_err(mp, "inode inactivation failed, error %d", error);
sb_end_write(mp->m_super);
- xfs_inactive_work_queue(mp);
+ xfs_inactive_work_queue(pag);
+}
+
+/* Wait for all background inactivation work to finish. */
+static void
+xfs_inactive_flush(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ for_each_perag_tag(mp, agno, pag, XFS_ICI_INACTIVE_TAG) {
+ bool flush;
+
+ spin_lock(&pag->pag_ici_lock);
+ flush = pag->pag_ici_inactive > 0;
+ spin_unlock(&pag->pag_ici_lock);
+ if (flush)
+ flush_delayed_work(&pag->pag_inactive_work);
+ }
}
/* Flush all inode inactivation work that might be queued. */
@@ -2323,8 +2356,8 @@ void
xfs_inactive_force(
struct xfs_mount *mp)
{
- queue_delayed_work(mp->m_inactive_workqueue, &mp->m_inactive_work, 0);
- flush_delayed_work(&mp->m_inactive_work);
+ xfs_inactive_schedule_now(mp);
+ xfs_inactive_flush(mp);
}
/*
@@ -2336,9 +2369,40 @@ void
xfs_inactive_shutdown(
struct xfs_mount *mp)
{
- cancel_delayed_work_sync(&mp->m_inactive_work);
- flush_workqueue(mp->m_inactive_workqueue);
+ xfs_inactive_cancel_work(mp);
xfs_inactive_inodes(mp, NULL);
cancel_delayed_work_sync(&mp->m_reclaim_work);
xfs_reclaim_inodes(mp, SYNC_WAIT);
}
+
+/* Cancel all queued inactivation work. */
+void
+xfs_inactive_cancel_work(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ for_each_perag_tag(mp, agno, pag, XFS_ICI_INACTIVE_TAG)
+ cancel_delayed_work_sync(&pag->pag_inactive_work);
+ flush_workqueue(mp->m_inactive_workqueue);
+}
+
+/* Cancel all pending deferred inactivation work and reschedule it now. */
+void
+xfs_inactive_schedule_now(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ for_each_perag_tag(mp, agno, pag, XFS_ICI_INACTIVE_TAG) {
+ spin_lock(&pag->pag_ici_lock);
+ if (pag->pag_ici_inactive) {
+ cancel_delayed_work(&pag->pag_inactive_work);
+ queue_delayed_work(mp->m_inactive_workqueue,
+ &pag->pag_inactive_work, 0);
+ }
+ spin_unlock(&pag->pag_ici_lock);
+ }
+}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index d6e79e7b5d94..a82b473b88a2 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -86,5 +86,7 @@ void xfs_inactive_worker(struct work_struct *work);
int xfs_inactive_inodes(struct xfs_mount *mp, struct xfs_eofblocks *eofb);
void xfs_inactive_force(struct xfs_mount *mp);
void xfs_inactive_shutdown(struct xfs_mount *mp);
+void xfs_inactive_cancel_work(struct xfs_mount *mp);
+void xfs_inactive_schedule_now(struct xfs_mount *mp);
#endif
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 27729a8c8c12..b9b37eff4063 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -127,6 +127,7 @@ __xfs_free_perag(
struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
+ ASSERT(!delayed_work_pending(&pag->pag_inactive_work));
ASSERT(atomic_read(&pag->pag_ref) == 0);
kmem_free(pag);
}
@@ -148,6 +149,7 @@ xfs_free_perag(
ASSERT(pag);
ASSERT(atomic_read(&pag->pag_ref) == 0);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
+ cancel_delayed_work_sync(&pag->pag_inactive_work);
xfs_iunlink_destroy(pag);
xfs_buf_hash_destroy(pag);
mutex_destroy(&pag->pag_ici_reclaim_lock);
@@ -204,6 +206,7 @@ xfs_initialize_perag(
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
+ INIT_DELAYED_WORK(&pag->pag_inactive_work, xfs_inactive_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
if (xfs_buf_hash_init(pag))
goto out_free_pag;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 51f88b56bbbe..87a62b0543ec 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -162,7 +162,6 @@ typedef struct xfs_mount {
atomic_t m_active_trans; /* number trans frozen */
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_reclaim_work; /* background inode reclaim */
- struct delayed_work m_inactive_work; /* background inode inactive */
bool m_update_sb; /* sb needs update in mount */
int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */
@@ -366,6 +365,9 @@ typedef struct xfs_perag {
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
+ /* background inode inactivation */
+ struct delayed_work pag_inactive_work;
+
/* reference count */
uint8_t pagf_refcount_level;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 14c5d002c358..fced499ecdc9 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -521,7 +521,8 @@ xfs_init_mount_workqueues(
goto out_destroy_eofb;
mp->m_inactive_workqueue = alloc_workqueue("xfs-inactive/%s",
- WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, mp->m_super->s_id);
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_FREEZABLE, 0,
+ mp->m_super->s_id);
if (!mp->m_inactive_workqueue)
goto out_destroy_sync;
@@ -1449,6 +1450,7 @@ xfs_configure_background_workqueues(
max_active = min_t(unsigned int, max_active, WQ_UNBOUND_MAX_ACTIVE);
workqueue_set_max_active(mp->m_blockgc_workqueue, max_active);
+ workqueue_set_max_active(mp->m_inactive_workqueue, max_active);
}
static int
@@ -1856,7 +1858,6 @@ static int xfs_init_fs_context(
atomic_set(&mp->m_active_trans, 0);
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
- INIT_DELAYED_WORK(&mp->m_inactive_work, xfs_inactive_worker);
mp->m_kobj.kobject.kset = xfs_kset;
/*
* We don't create the finobt per-ag space reservation until after log
next prev parent reply other threads:[~2020-01-01 1:09 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-01 1:08 [PATCH v2 00/10] xfs: deferred inode inactivation Darrick J. Wong
2020-01-01 1:08 ` [PATCH 01/10] xfs: decide if inode needs inactivation Darrick J. Wong
2020-01-01 1:08 ` [PATCH 02/10] xfs: track unlinked inactive inode fs summary counters Darrick J. Wong
2020-01-01 1:08 ` [PATCH 03/10] xfs: track unlinked inactive inode quota counters Darrick J. Wong
2020-01-01 1:09 ` [PATCH 04/10] xfs: pass per-ag structure to the xfs_ici_walk execute function Darrick J. Wong
2020-01-01 1:09 ` [PATCH 05/10] xfs: pass around xfs_inode_ag_walk iget/irele helper functions Darrick J. Wong
2020-01-01 1:09 ` [PATCH 06/10] xfs: deferred inode inactivation Darrick J. Wong
2020-01-01 1:09 ` [PATCH 07/10] xfs: force inode inactivation and retry fs writes when there isn't space Darrick J. Wong
2020-01-01 1:09 ` [PATCH 08/10] xfs: force inactivation before fallocate when space is low Darrick J. Wong
2020-01-01 1:09 ` Darrick J. Wong [this message]
2020-01-01 1:09 ` [PATCH 10/10] xfs: create a polled function to force inode inactivation Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=157784097668.1362752.16785191645786207862.stgit@magnolia \
--to=darrick.wong@oracle.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).