From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 09/10] xfs: parallelize inode inactivation
Date: Tue, 31 Dec 2019 17:09:36 -0800 [thread overview]
Message-ID: <157784097668.1362752.16785191645786207862.stgit@magnolia> (raw)
In-Reply-To: <157784092020.1362752.15046503361741521784.stgit@magnolia>
From: Darrick J. Wong <darrick.wong@oracle.com>
Split the inode inactivation work into per-AG work items so that we can
take advantage of parallelization.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/scrub/common.c | 2 +
fs/xfs/xfs_icache.c | 90 ++++++++++++++++++++++++++++++++++++++++++-------
fs/xfs/xfs_icache.h | 2 +
fs/xfs/xfs_mount.c | 3 ++
fs/xfs/xfs_mount.h | 4 ++
fs/xfs/xfs_super.c | 5 ++-
6 files changed, 90 insertions(+), 16 deletions(-)
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 52fc05ee7ef8..402d42a277f4 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -910,6 +910,7 @@ xchk_stop_reaping(
{
sc->flags |= XCHK_REAPING_DISABLED;
xfs_blockgc_stop(sc->mp);
+ xfs_inactive_cancel_work(sc->mp);
}
/* Restart background reaping of resources. */
@@ -917,6 +918,7 @@ void
xchk_start_reaping(
struct xfs_scrub *sc)
{
+ xfs_inactive_schedule_now(sc->mp);
xfs_blockgc_start(sc->mp);
sc->flags &= ~XCHK_REAPING_DISABLED;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 13b318dc2e89..5240e9e517d7 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -2130,12 +2130,12 @@ xfs_inode_clear_cowblocks_tag(
/* Queue a new inode inactivation pass if there are reclaimable inodes. */
static void
xfs_inactive_work_queue(
- struct xfs_mount *mp)
+ struct xfs_perag *pag)
{
rcu_read_lock();
- if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_INACTIVE_TAG))
- queue_delayed_work(mp->m_inactive_workqueue,
- &mp->m_inactive_work,
+ if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_INACTIVE_TAG))
+ queue_delayed_work(pag->pag_mount->m_inactive_workqueue,
+ &pag->pag_inactive_work,
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
rcu_read_unlock();
}
@@ -2158,7 +2158,7 @@ xfs_perag_set_inactive_tag(
spin_unlock(&mp->m_perag_lock);
/* schedule periodic background inode inactivation */
- xfs_inactive_work_queue(mp);
+ xfs_inactive_work_queue(pag);
trace_xfs_perag_set_inactive(mp, pag->pag_agno, -1, _RET_IP_);
}
@@ -2275,6 +2275,19 @@ static const struct xfs_ici_walk_ops xfs_inactive_iwalk_ops = {
.iwalk = xfs_inactive_inode,
};
+/*
+ * Inactivate the inodes in an AG. Even if the filesystem is corrupted, we
+ * still need to clear the INACTIVE iflag so that we can move on to reclaiming
+ * the inode.
+ */
+static int
+xfs_inactive_inodes_pag(
+ struct xfs_perag *pag)
+{
+ return xfs_ici_walk_ag(pag, &xfs_inactive_iwalk_ops, 0, NULL,
+ XFS_ICI_INACTIVE_TAG);
+}
+
/*
* Walk the AGs and reclaim the inodes in them. Even if the filesystem is
* corrupted, we still need to clear the INACTIVE iflag so that we can move
@@ -2294,8 +2307,9 @@ void
xfs_inactive_worker(
struct work_struct *work)
{
- struct xfs_mount *mp = container_of(to_delayed_work(work),
- struct xfs_mount, m_inactive_work);
+ struct xfs_perag *pag = container_of(to_delayed_work(work),
+ struct xfs_perag, pag_inactive_work);
+ struct xfs_mount *mp = pag->pag_mount;
int error;
/*
@@ -2310,12 +2324,31 @@ xfs_inactive_worker(
if (!sb_start_write_trylock(mp->m_super))
return;
- error = xfs_inactive_inodes(mp, NULL);
+ error = xfs_inactive_inodes_pag(pag);
if (error && error != -EAGAIN)
xfs_err(mp, "inode inactivation failed, error %d", error);
sb_end_write(mp->m_super);
- xfs_inactive_work_queue(mp);
+ xfs_inactive_work_queue(pag);
+}
+
+/* Wait for all background inactivation work to finish. */
+static void
+xfs_inactive_flush(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ for_each_perag_tag(mp, agno, pag, XFS_ICI_INACTIVE_TAG) {
+ bool flush;
+
+ spin_lock(&pag->pag_ici_lock);
+ flush = pag->pag_ici_inactive > 0;
+ spin_unlock(&pag->pag_ici_lock);
+ if (flush)
+ flush_delayed_work(&pag->pag_inactive_work);
+ }
}
/* Flush all inode inactivation work that might be queued. */
@@ -2323,8 +2356,8 @@ void
xfs_inactive_force(
struct xfs_mount *mp)
{
- queue_delayed_work(mp->m_inactive_workqueue, &mp->m_inactive_work, 0);
- flush_delayed_work(&mp->m_inactive_work);
+ xfs_inactive_schedule_now(mp);
+ xfs_inactive_flush(mp);
}
/*
@@ -2336,9 +2369,40 @@ void
xfs_inactive_shutdown(
struct xfs_mount *mp)
{
- cancel_delayed_work_sync(&mp->m_inactive_work);
- flush_workqueue(mp->m_inactive_workqueue);
+ xfs_inactive_cancel_work(mp);
xfs_inactive_inodes(mp, NULL);
cancel_delayed_work_sync(&mp->m_reclaim_work);
xfs_reclaim_inodes(mp, SYNC_WAIT);
}
+
+/* Cancel all queued inactivation work. */
+void
+xfs_inactive_cancel_work(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ for_each_perag_tag(mp, agno, pag, XFS_ICI_INACTIVE_TAG)
+ cancel_delayed_work_sync(&pag->pag_inactive_work);
+ flush_workqueue(mp->m_inactive_workqueue);
+}
+
+/* Cancel all pending deferred inactivation work and reschedule it now. */
+void
+xfs_inactive_schedule_now(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ for_each_perag_tag(mp, agno, pag, XFS_ICI_INACTIVE_TAG) {
+ spin_lock(&pag->pag_ici_lock);
+ if (pag->pag_ici_inactive) {
+ cancel_delayed_work(&pag->pag_inactive_work);
+ queue_delayed_work(mp->m_inactive_workqueue,
+ &pag->pag_inactive_work, 0);
+ }
+ spin_unlock(&pag->pag_ici_lock);
+ }
+}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index d6e79e7b5d94..a82b473b88a2 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -86,5 +86,7 @@ void xfs_inactive_worker(struct work_struct *work);
int xfs_inactive_inodes(struct xfs_mount *mp, struct xfs_eofblocks *eofb);
void xfs_inactive_force(struct xfs_mount *mp);
void xfs_inactive_shutdown(struct xfs_mount *mp);
+void xfs_inactive_cancel_work(struct xfs_mount *mp);
+void xfs_inactive_schedule_now(struct xfs_mount *mp);
#endif
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 27729a8c8c12..b9b37eff4063 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -127,6 +127,7 @@ __xfs_free_perag(
struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
+ ASSERT(!delayed_work_pending(&pag->pag_inactive_work));
ASSERT(atomic_read(&pag->pag_ref) == 0);
kmem_free(pag);
}
@@ -148,6 +149,7 @@ xfs_free_perag(
ASSERT(pag);
ASSERT(atomic_read(&pag->pag_ref) == 0);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
+ cancel_delayed_work_sync(&pag->pag_inactive_work);
xfs_iunlink_destroy(pag);
xfs_buf_hash_destroy(pag);
mutex_destroy(&pag->pag_ici_reclaim_lock);
@@ -204,6 +206,7 @@ xfs_initialize_perag(
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
+ INIT_DELAYED_WORK(&pag->pag_inactive_work, xfs_inactive_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
if (xfs_buf_hash_init(pag))
goto out_free_pag;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 51f88b56bbbe..87a62b0543ec 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -162,7 +162,6 @@ typedef struct xfs_mount {
atomic_t m_active_trans; /* number trans frozen */
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_reclaim_work; /* background inode reclaim */
- struct delayed_work m_inactive_work; /* background inode inactive */
bool m_update_sb; /* sb needs update in mount */
int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */
@@ -366,6 +365,9 @@ typedef struct xfs_perag {
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
+ /* background inode inactivation */
+ struct delayed_work pag_inactive_work;
+
/* reference count */
uint8_t pagf_refcount_level;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 14c5d002c358..fced499ecdc9 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -521,7 +521,8 @@ xfs_init_mount_workqueues(
goto out_destroy_eofb;
mp->m_inactive_workqueue = alloc_workqueue("xfs-inactive/%s",
- WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, mp->m_super->s_id);
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_FREEZABLE, 0,
+ mp->m_super->s_id);
if (!mp->m_inactive_workqueue)
goto out_destroy_sync;
@@ -1449,6 +1450,7 @@ xfs_configure_background_workqueues(
max_active = min_t(unsigned int, max_active, WQ_UNBOUND_MAX_ACTIVE);
workqueue_set_max_active(mp->m_blockgc_workqueue, max_active);
+ workqueue_set_max_active(mp->m_inactive_workqueue, max_active);
}
static int
@@ -1856,7 +1858,6 @@ static int xfs_init_fs_context(
atomic_set(&mp->m_active_trans, 0);
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
- INIT_DELAYED_WORK(&mp->m_inactive_work, xfs_inactive_worker);
mp->m_kobj.kobject.kset = xfs_kset;
/*
* We don't create the finobt per-ag space reservation until after log
next prev parent reply other threads:[~2020-01-01 1:09 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-01 1:08 [PATCH v2 00/10] xfs: deferred inode inactivation Darrick J. Wong
2020-01-01 1:08 ` [PATCH 01/10] xfs: decide if inode needs inactivation Darrick J. Wong
2020-01-01 1:08 ` [PATCH 02/10] xfs: track unlinked inactive inode fs summary counters Darrick J. Wong
2020-01-01 1:08 ` [PATCH 03/10] xfs: track unlinked inactive inode quota counters Darrick J. Wong
2020-01-01 1:09 ` [PATCH 04/10] xfs: pass per-ag structure to the xfs_ici_walk execute function Darrick J. Wong
2020-01-01 1:09 ` [PATCH 05/10] xfs: pass around xfs_inode_ag_walk iget/irele helper functions Darrick J. Wong
2020-01-01 1:09 ` [PATCH 06/10] xfs: deferred inode inactivation Darrick J. Wong
2020-01-01 1:09 ` [PATCH 07/10] xfs: force inode inactivation and retry fs writes when there isn't space Darrick J. Wong
2020-01-01 1:09 ` [PATCH 08/10] xfs: force inactivation before fallocate when space is low Darrick J. Wong
2020-01-01 1:09 ` Darrick J. Wong [this message]
2020-01-01 1:09 ` [PATCH 10/10] xfs: create a polled function to force inode inactivation Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=157784097668.1362752.16785191645786207862.stgit@magnolia \
--to=darrick.wong@oracle.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.