All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 09/10] xfs: parallelize inode inactivation
Date: Tue, 31 Dec 2019 17:09:36 -0800	[thread overview]
Message-ID: <157784097668.1362752.16785191645786207862.stgit@magnolia> (raw)
In-Reply-To: <157784092020.1362752.15046503361741521784.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

Split the inode inactivation work into per-AG work items so that we can
take advantage of parallelization.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/scrub/common.c |    2 +
 fs/xfs/xfs_icache.c   |   90 ++++++++++++++++++++++++++++++++++++++++++-------
 fs/xfs/xfs_icache.h   |    2 +
 fs/xfs/xfs_mount.c    |    3 ++
 fs/xfs/xfs_mount.h    |    4 ++
 fs/xfs/xfs_super.c    |    5 ++-
 6 files changed, 90 insertions(+), 16 deletions(-)


diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 52fc05ee7ef8..402d42a277f4 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -910,6 +910,7 @@ xchk_stop_reaping(
 {
 	sc->flags |= XCHK_REAPING_DISABLED;
 	xfs_blockgc_stop(sc->mp);
+	xfs_inactive_cancel_work(sc->mp);
 }
 
 /* Restart background reaping of resources. */
@@ -917,6 +918,7 @@ void
 xchk_start_reaping(
 	struct xfs_scrub	*sc)
 {
+	xfs_inactive_schedule_now(sc->mp);
 	xfs_blockgc_start(sc->mp);
 	sc->flags &= ~XCHK_REAPING_DISABLED;
 }
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 13b318dc2e89..5240e9e517d7 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -2130,12 +2130,12 @@ xfs_inode_clear_cowblocks_tag(
 /* Queue a new inode inactivation pass if there are reclaimable inodes. */
 static void
 xfs_inactive_work_queue(
-	struct xfs_mount        *mp)
+	struct xfs_perag	*pag)
 {
 	rcu_read_lock();
-	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_INACTIVE_TAG))
-		queue_delayed_work(mp->m_inactive_workqueue,
-				&mp->m_inactive_work,
+	if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_INACTIVE_TAG))
+		queue_delayed_work(pag->pag_mount->m_inactive_workqueue,
+				&pag->pag_inactive_work,
 				msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
 	rcu_read_unlock();
 }
@@ -2158,7 +2158,7 @@ xfs_perag_set_inactive_tag(
 	spin_unlock(&mp->m_perag_lock);
 
 	/* schedule periodic background inode inactivation */
-	xfs_inactive_work_queue(mp);
+	xfs_inactive_work_queue(pag);
 
 	trace_xfs_perag_set_inactive(mp, pag->pag_agno, -1, _RET_IP_);
 }
@@ -2275,6 +2275,19 @@ static const struct xfs_ici_walk_ops	xfs_inactive_iwalk_ops = {
 	.iwalk		= xfs_inactive_inode,
 };
 
+/*
+ * Inactivate the inodes in an AG. Even if the filesystem is corrupted, we
+ * still need to clear the INACTIVE iflag so that we can move on to reclaiming
+ * the inode.
+ */
+static int
+xfs_inactive_inodes_pag(
+	struct xfs_perag	*pag)
+{
+	return xfs_ici_walk_ag(pag, &xfs_inactive_iwalk_ops, 0, NULL,
+			XFS_ICI_INACTIVE_TAG);
+}
+
 /*
  * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
  * corrupted, we still need to clear the INACTIVE iflag so that we can move
@@ -2294,8 +2307,9 @@ void
 xfs_inactive_worker(
 	struct work_struct	*work)
 {
-	struct xfs_mount	*mp = container_of(to_delayed_work(work),
-					struct xfs_mount, m_inactive_work);
+	struct xfs_perag	*pag = container_of(to_delayed_work(work),
+					struct xfs_perag, pag_inactive_work);
+	struct xfs_mount	*mp = pag->pag_mount;
 	int			error;
 
 	/*
@@ -2310,12 +2324,31 @@ xfs_inactive_worker(
 	if (!sb_start_write_trylock(mp->m_super))
 		return;
 
-	error = xfs_inactive_inodes(mp, NULL);
+	error = xfs_inactive_inodes_pag(pag);
 	if (error && error != -EAGAIN)
 		xfs_err(mp, "inode inactivation failed, error %d", error);
 
 	sb_end_write(mp->m_super);
-	xfs_inactive_work_queue(mp);
+	xfs_inactive_work_queue(pag);
+}
+
+/* Wait for all background inactivation work to finish. */
+static void
+xfs_inactive_flush(
+	struct xfs_mount	*mp)
+{
+	struct xfs_perag	*pag;
+	xfs_agnumber_t		agno;
+
+	for_each_perag_tag(mp, agno, pag, XFS_ICI_INACTIVE_TAG) {
+		bool		flush;
+
+		spin_lock(&pag->pag_ici_lock);
+		flush = pag->pag_ici_inactive > 0;
+		spin_unlock(&pag->pag_ici_lock);
+		if (flush)
+			flush_delayed_work(&pag->pag_inactive_work);
+	}
 }
 
 /* Flush all inode inactivation work that might be queued. */
@@ -2323,8 +2356,8 @@ void
 xfs_inactive_force(
 	struct xfs_mount	*mp)
 {
-	queue_delayed_work(mp->m_inactive_workqueue, &mp->m_inactive_work, 0);
-	flush_delayed_work(&mp->m_inactive_work);
+	xfs_inactive_schedule_now(mp);
+	xfs_inactive_flush(mp);
 }
 
 /*
@@ -2336,9 +2369,40 @@ void
 xfs_inactive_shutdown(
 	struct xfs_mount	*mp)
 {
-	cancel_delayed_work_sync(&mp->m_inactive_work);
-	flush_workqueue(mp->m_inactive_workqueue);
+	xfs_inactive_cancel_work(mp);
 	xfs_inactive_inodes(mp, NULL);
 	cancel_delayed_work_sync(&mp->m_reclaim_work);
 	xfs_reclaim_inodes(mp, SYNC_WAIT);
 }
+
+/* Cancel all queued inactivation work. */
+void
+xfs_inactive_cancel_work(
+	struct xfs_mount	*mp)
+{
+	struct xfs_perag	*pag;
+	xfs_agnumber_t		agno;
+
+	for_each_perag_tag(mp, agno, pag, XFS_ICI_INACTIVE_TAG)
+		cancel_delayed_work_sync(&pag->pag_inactive_work);
+	flush_workqueue(mp->m_inactive_workqueue);
+}
+
+/* Cancel all pending deferred inactivation work and reschedule it now. */
+void
+xfs_inactive_schedule_now(
+	struct xfs_mount	*mp)
+{
+	struct xfs_perag	*pag;
+	xfs_agnumber_t		agno;
+
+	for_each_perag_tag(mp, agno, pag, XFS_ICI_INACTIVE_TAG) {
+		spin_lock(&pag->pag_ici_lock);
+		if (pag->pag_ici_inactive) {
+			cancel_delayed_work(&pag->pag_inactive_work);
+			queue_delayed_work(mp->m_inactive_workqueue,
+					&pag->pag_inactive_work, 0);
+		}
+		spin_unlock(&pag->pag_ici_lock);
+	}
+}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index d6e79e7b5d94..a82b473b88a2 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -86,5 +86,7 @@ void xfs_inactive_worker(struct work_struct *work);
 int xfs_inactive_inodes(struct xfs_mount *mp, struct xfs_eofblocks *eofb);
 void xfs_inactive_force(struct xfs_mount *mp);
 void xfs_inactive_shutdown(struct xfs_mount *mp);
+void xfs_inactive_cancel_work(struct xfs_mount *mp);
+void xfs_inactive_schedule_now(struct xfs_mount *mp);
 
 #endif
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 27729a8c8c12..b9b37eff4063 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -127,6 +127,7 @@ __xfs_free_perag(
 	struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
 
 	ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
+	ASSERT(!delayed_work_pending(&pag->pag_inactive_work));
 	ASSERT(atomic_read(&pag->pag_ref) == 0);
 	kmem_free(pag);
 }
@@ -148,6 +149,7 @@ xfs_free_perag(
 		ASSERT(pag);
 		ASSERT(atomic_read(&pag->pag_ref) == 0);
 		cancel_delayed_work_sync(&pag->pag_blockgc_work);
+		cancel_delayed_work_sync(&pag->pag_inactive_work);
 		xfs_iunlink_destroy(pag);
 		xfs_buf_hash_destroy(pag);
 		mutex_destroy(&pag->pag_ici_reclaim_lock);
@@ -204,6 +206,7 @@ xfs_initialize_perag(
 		spin_lock_init(&pag->pag_ici_lock);
 		mutex_init(&pag->pag_ici_reclaim_lock);
 		INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
+		INIT_DELAYED_WORK(&pag->pag_inactive_work, xfs_inactive_worker);
 		INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
 		if (xfs_buf_hash_init(pag))
 			goto out_free_pag;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 51f88b56bbbe..87a62b0543ec 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -162,7 +162,6 @@ typedef struct xfs_mount {
 	atomic_t		m_active_trans;	/* number trans frozen */
 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
 	struct delayed_work	m_reclaim_work;	/* background inode reclaim */
-	struct delayed_work	m_inactive_work; /* background inode inactive */
 	bool			m_update_sb;	/* sb needs update in mount */
 	int64_t			m_low_space[XFS_LOWSP_MAX];
 						/* low free space thresholds */
@@ -366,6 +365,9 @@ typedef struct xfs_perag {
 	/* background prealloc block trimming */
 	struct delayed_work	pag_blockgc_work;
 
+	/* background inode inactivation */
+	struct delayed_work	pag_inactive_work;
+
 	/* reference count */
 	uint8_t			pagf_refcount_level;
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 14c5d002c358..fced499ecdc9 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -521,7 +521,8 @@ xfs_init_mount_workqueues(
 		goto out_destroy_eofb;
 
 	mp->m_inactive_workqueue = alloc_workqueue("xfs-inactive/%s",
-			WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, mp->m_super->s_id);
+			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_FREEZABLE, 0,
+			mp->m_super->s_id);
 	if (!mp->m_inactive_workqueue)
 		goto out_destroy_sync;
 
@@ -1449,6 +1450,7 @@ xfs_configure_background_workqueues(
 
 	max_active = min_t(unsigned int, max_active, WQ_UNBOUND_MAX_ACTIVE);
 	workqueue_set_max_active(mp->m_blockgc_workqueue, max_active);
+	workqueue_set_max_active(mp->m_inactive_workqueue, max_active);
 }
 
 static int
@@ -1856,7 +1858,6 @@ static int xfs_init_fs_context(
 	atomic_set(&mp->m_active_trans, 0);
 	INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
 	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
-	INIT_DELAYED_WORK(&mp->m_inactive_work, xfs_inactive_worker);
 	mp->m_kobj.kobject.kset = xfs_kset;
 	/*
 	 * We don't create the finobt per-ag space reservation until after log


  parent reply	other threads:[~2020-01-01  1:09 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-01  1:08 [PATCH v2 00/10] xfs: deferred inode inactivation Darrick J. Wong
2020-01-01  1:08 ` [PATCH 01/10] xfs: decide if inode needs inactivation Darrick J. Wong
2020-01-01  1:08 ` [PATCH 02/10] xfs: track unlinked inactive inode fs summary counters Darrick J. Wong
2020-01-01  1:08 ` [PATCH 03/10] xfs: track unlinked inactive inode quota counters Darrick J. Wong
2020-01-01  1:09 ` [PATCH 04/10] xfs: pass per-ag structure to the xfs_ici_walk execute function Darrick J. Wong
2020-01-01  1:09 ` [PATCH 05/10] xfs: pass around xfs_inode_ag_walk iget/irele helper functions Darrick J. Wong
2020-01-01  1:09 ` [PATCH 06/10] xfs: deferred inode inactivation Darrick J. Wong
2020-01-01  1:09 ` [PATCH 07/10] xfs: force inode inactivation and retry fs writes when there isn't space Darrick J. Wong
2020-01-01  1:09 ` [PATCH 08/10] xfs: force inactivation before fallocate when space is low Darrick J. Wong
2020-01-01  1:09 ` Darrick J. Wong [this message]
2020-01-01  1:09 ` [PATCH 10/10] xfs: create a polled function to force inode inactivation Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=157784097668.1362752.16785191645786207862.stgit@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.