Linux-XFS Archive on lore.kernel.org
 help / color / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: linux-xfs@vger.kernel.org
Subject: [PATCH 14/24] xfs: remove IO submission from xfs_reclaim_inode()
Date: Fri, 22 May 2020 13:50:19 +1000
Message-ID: <20200522035029.3022405-15-david@fromorbit.com> (raw)
In-Reply-To: <20200522035029.3022405-1-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

We no longer need to issue IO from shrinker based inode reclaim to
prevent spurious OOM killer invocation. This leaves only the global
filesystem management operations such as unmount needing to
writeback dirty inodes and reclaim them.

Instead of using the reclaim pass to write dirty inodes before
reclaiming them, use the AIL to push all the dirty inodes before we
try to reclaim them. This allows us to remove all the conditional
SYNC_WAIT locking and the writeback code from xfs_reclaim_inode()
and greatly simplify the checks we need to do to reclaim an inode.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_icache.c | 116 +++++++++++---------------------------------
 1 file changed, 29 insertions(+), 87 deletions(-)

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0f0f8fcd61b03..ee9bc82a0dfbe 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1130,24 +1130,17 @@ xfs_reclaim_inode_grab(
  *	dirty, async	=> requeue
  *	dirty, sync	=> flush, wait and reclaim
  */
-STATIC int
+static bool
 xfs_reclaim_inode(
 	struct xfs_inode	*ip,
 	struct xfs_perag	*pag,
 	int			sync_mode)
 {
-	struct xfs_buf		*bp = NULL;
 	xfs_ino_t		ino = ip->i_ino; /* for radix_tree_delete */
-	int			error;
 
-restart:
-	error = 0;
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if (!xfs_iflock_nowait(ip)) {
-		if (!(sync_mode & SYNC_WAIT))
-			goto out;
-		xfs_iflock(ip);
-	}
+	if (!xfs_iflock_nowait(ip))
+		goto out;
 
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 		xfs_iunpin_wait(ip);
@@ -1155,51 +1148,19 @@ xfs_reclaim_inode(
 		xfs_iflush_abort(ip);
 		goto reclaim;
 	}
-	if (xfs_ipincount(ip)) {
-		if (!(sync_mode & SYNC_WAIT))
-			goto out_ifunlock;
-		xfs_iunpin_wait(ip);
-	}
+	if (xfs_ipincount(ip))
+		goto out_ifunlock;
 	if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) {
 		xfs_ifunlock(ip);
 		goto reclaim;
 	}
 
-	/*
-	 * Never flush out dirty data during non-blocking reclaim, as it would
-	 * just contend with AIL pushing trying to do the same job.
-	 */
-	if (!(sync_mode & SYNC_WAIT))
-		goto out_ifunlock;
-
-	/*
-	 * Now we have an inode that needs flushing.
-	 *
-	 * Note that xfs_iflush will never block on the inode buffer lock, as
-	 * xfs_ifree_cluster() can lock the inode buffer before it locks the
-	 * ip->i_lock, and we are doing the exact opposite here.  As a result,
-	 * doing a blocking xfs_imap_to_bp() to get the cluster buffer would
-	 * result in an ABBA deadlock with xfs_ifree_cluster().
-	 *
-	 * As xfs_ifree_cluser() must gather all inodes that are active in the
-	 * cache to mark them stale, if we hit this case we don't actually want
-	 * to do IO here - we want the inode marked stale so we can simply
-	 * reclaim it.  Hence if we get an EAGAIN error here,  just unlock the
-	 * inode, back off and try again.  Hopefully the next pass through will
-	 * see the stale flag set on the inode.
-	 */
-	error = xfs_iflush(ip, &bp);
-	if (error == -EAGAIN) {
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		/* backoff longer than in xfs_ifree_cluster */
-		delay(2);
-		goto restart;
-	}
-
-	if (!error) {
-		error = xfs_bwrite(bp);
-		xfs_buf_relse(bp);
-	}
+out_ifunlock:
+	xfs_ifunlock(ip);
+out:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_iflags_clear(ip, XFS_IRECLAIM);
+	return false;
 
 reclaim:
 	ASSERT(!xfs_isiflocked(ip));
@@ -1249,21 +1210,7 @@ xfs_reclaim_inode(
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
 	__xfs_inode_free(ip);
-	return error;
-
-out_ifunlock:
-	xfs_ifunlock(ip);
-out:
-	xfs_iflags_clear(ip, XFS_IRECLAIM);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	/*
-	 * We could return -EAGAIN here to make reclaim rescan the inode tree in
-	 * a short while. However, this just burns CPU time scanning the tree
-	 * waiting for IO to complete and the reclaim work never goes back to
-	 * the idle state. Instead, return 0 to let the next scheduled
-	 * background reclaim attempt to reclaim the inode again.
-	 */
-	return 0;
+	return true;
 }
 
 /*
@@ -1272,20 +1219,17 @@ xfs_reclaim_inode(
  * then a shut down during filesystem unmount reclaim walk leak all the
  * unreclaimed inodes.
  */
-STATIC int
+static int
 xfs_reclaim_inodes_ag(
 	struct xfs_mount	*mp,
 	int			flags,
 	int			*nr_to_scan)
 {
 	struct xfs_perag	*pag;
-	int			error = 0;
-	int			last_error = 0;
 	xfs_agnumber_t		ag;
 	int			trylock = flags & SYNC_TRYLOCK;
 	int			skipped;
 
-restart:
 	ag = 0;
 	skipped = 0;
 	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
@@ -1359,9 +1303,8 @@ xfs_reclaim_inodes_ag(
 			for (i = 0; i < nr_found; i++) {
 				if (!batch[i])
 					continue;
-				error = xfs_reclaim_inode(batch[i], pag, flags);
-				if (error && last_error != -EFSCORRUPTED)
-					last_error = error;
+				if (!xfs_reclaim_inode(batch[i], pag, flags))
+					skipped++;
 			}
 
 			*nr_to_scan -= XFS_LOOKUP_BATCH;
@@ -1377,19 +1320,7 @@ xfs_reclaim_inodes_ag(
 		mutex_unlock(&pag->pag_ici_reclaim_lock);
 		xfs_perag_put(pag);
 	}
-
-	/*
-	 * if we skipped any AG, and we still have scan count remaining, do
-	 * another pass this time using blocking reclaim semantics (i.e
-	 * waiting on the reclaim locks and ignoring the reclaim cursors). This
-	 * ensure that when we get more reclaimers than AGs we block rather
-	 * than spin trying to execute reclaim.
-	 */
-	if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
-		trylock = 0;
-		goto restart;
-	}
-	return last_error;
+	return skipped;
 }
 
 int
@@ -1398,8 +1329,18 @@ xfs_reclaim_inodes(
 	int		mode)
 {
 	int		nr_to_scan = INT_MAX;
+	int		skipped;
 
-	return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
+	skipped = xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
+	if (!(mode & SYNC_WAIT))
+		return 0;
+
+	do {
+		xfs_ail_push_all_sync(mp->m_ail);
+		skipped = xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
+	} while (skipped > 0);
+
+	return 0;
 }
 
 /*
@@ -1420,7 +1361,8 @@ xfs_reclaim_inodes_nr(
 	xfs_reclaim_work_queue(mp);
 	xfs_ail_push_all(mp->m_ail);
 
-	return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
+	xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
+	return 0;
 }
 
 /*
-- 
2.26.2.761.g0e0b3e54be


  parent reply index

Thread overview: 91+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-22  3:50 [PATCH 00/24] xfs: rework inode flushing to make inode reclaim fully asynchronous Dave Chinner
2020-05-22  3:50 ` [PATCH 01/24] xfs: remove logged flag from inode log item Dave Chinner
2020-05-22  7:25   ` Christoph Hellwig
2020-05-22 21:13   ` Darrick J. Wong
2020-05-22  3:50 ` [PATCH 02/24] xfs: add an inode item lock Dave Chinner
2020-05-22  6:45   ` Amir Goldstein
2020-05-22 21:24   ` Darrick J. Wong
2020-05-23  8:45   ` Christoph Hellwig
2020-05-22  3:50 ` [PATCH 03/24] xfs: mark inode buffers in cache Dave Chinner
2020-05-22  7:45   ` Amir Goldstein
2020-05-22 21:35   ` Darrick J. Wong
2020-05-24 23:41     ` Dave Chinner
2020-05-23  8:48   ` Christoph Hellwig
2020-05-25  0:06     ` Dave Chinner
2020-05-22  3:50 ` [PATCH 04/24] xfs: mark dquot " Dave Chinner
2020-05-22  7:46   ` Amir Goldstein
2020-05-22 21:38   ` Darrick J. Wong
2020-05-22  3:50 ` [PATCH 05/24] xfs: mark log recovery buffers for completion Dave Chinner
2020-05-22  7:41   ` Amir Goldstein
2020-05-24 23:54     ` Dave Chinner
2020-05-22 21:41   ` Darrick J. Wong
2020-05-22  3:50 ` [PATCH 06/24] xfs: call xfs_buf_iodone directly Dave Chinner
2020-05-22  7:56   ` Amir Goldstein
2020-05-22 21:53   ` Darrick J. Wong
2020-05-22  3:50 ` [PATCH 07/24] xfs: clean up whacky buffer log item list reinit Dave Chinner
2020-05-22 22:01   ` Darrick J. Wong
2020-05-23  8:50   ` Christoph Hellwig
2020-05-22  3:50 ` [PATCH 08/24] xfs: fold xfs_istale_done into xfs_iflush_done Dave Chinner
2020-05-22 22:10   ` Darrick J. Wong
2020-05-23  9:12   ` Christoph Hellwig
2020-05-22  3:50 ` [PATCH 09/24] xfs: use direct calls for dquot IO completion Dave Chinner
2020-05-22 22:13   ` Darrick J. Wong
2020-05-23  9:16   ` Christoph Hellwig
2020-05-22  3:50 ` [PATCH 10/24] xfs: clean up the buffer iodone callback functions Dave Chinner
2020-05-22 22:26   ` Darrick J. Wong
2020-05-25  0:37     ` Dave Chinner
2020-05-23  9:19   ` Christoph Hellwig
2020-05-22  3:50 ` [PATCH 11/24] xfs: get rid of log item callbacks Dave Chinner
2020-05-22 22:27   ` Darrick J. Wong
2020-05-23  9:19   ` Christoph Hellwig
2020-05-22  3:50 ` [PATCH 12/24] xfs: pin inode backing buffer to the inode log item Dave Chinner
2020-05-22 22:39   ` Darrick J. Wong
2020-05-23  9:34   ` Christoph Hellwig
2020-05-23 21:43     ` Dave Chinner
2020-05-24  5:31       ` Christoph Hellwig
2020-05-24 23:13         ` Dave Chinner
2020-05-22  3:50 ` [PATCH 13/24] xfs: make inode reclaim almost non-blocking Dave Chinner
2020-05-22 12:19   ` Amir Goldstein
2020-05-22 22:48   ` Darrick J. Wong
2020-05-23 22:29     ` Dave Chinner
2020-05-22  3:50 ` Dave Chinner [this message]
2020-05-22 23:06   ` [PATCH 14/24] xfs: remove IO submission from xfs_reclaim_inode() Darrick J. Wong
2020-05-25  3:49     ` Dave Chinner
2020-05-23  9:40   ` Christoph Hellwig
2020-05-23 22:35     ` Dave Chinner
2020-05-22  3:50 ` [PATCH 15/24] xfs: allow multiple reclaimers per AG Dave Chinner
2020-05-22 23:10   ` Darrick J. Wong
2020-05-23 22:35     ` Dave Chinner
2020-05-22  3:50 ` [PATCH 16/24] xfs: don't block inode reclaim on the ILOCK Dave Chinner
2020-05-22 23:11   ` Darrick J. Wong
2020-05-22  3:50 ` [PATCH 17/24] xfs: remove SYNC_TRYLOCK from inode reclaim Dave Chinner
2020-05-22 23:14   ` Darrick J. Wong
2020-05-23 22:42     ` Dave Chinner
2020-05-22  3:50 ` [PATCH 18/24] xfs: clean up inode reclaim comments Dave Chinner
2020-05-22 23:17   ` Darrick J. Wong
2020-05-22  3:50 ` [PATCH 19/24] xfs: attach inodes to the cluster buffer when dirtied Dave Chinner
2020-05-22 23:48   ` Darrick J. Wong
2020-05-23 22:59     ` Dave Chinner
2020-05-22  3:50 ` [PATCH 20/24] xfs: xfs_iflush() is no longer necessary Dave Chinner
2020-05-22 23:54   ` Darrick J. Wong
2020-05-22  3:50 ` [PATCH 21/24] xfs: rename xfs_iflush_int() Dave Chinner
2020-05-22 12:33   ` Amir Goldstein
2020-05-22 23:57   ` Darrick J. Wong
2020-05-22  3:50 ` [PATCH 22/24] xfs: rework xfs_iflush_cluster() dirty inode iteration Dave Chinner
2020-05-23  0:13   ` Darrick J. Wong
2020-05-23 23:14     ` Dave Chinner
2020-05-23 11:31   ` Christoph Hellwig
2020-05-23 23:23     ` Dave Chinner
2020-05-24  5:32       ` Christoph Hellwig
2020-05-23 11:39   ` Christoph Hellwig
2020-05-22  3:50 ` [PATCH 23/24] xfs: factor xfs_iflush_done Dave Chinner
2020-05-23  0:20   ` Darrick J. Wong
2020-05-23 11:35   ` Christoph Hellwig
2020-05-22  3:50 ` [PATCH 24/24] xfs: remove xfs_inobp_check() Dave Chinner
2020-05-23  0:16   ` Darrick J. Wong
2020-05-23 11:36   ` Christoph Hellwig
2020-05-22  4:04 ` [PATCH 00/24] xfs: rework inode flushing to make inode reclaim fully asynchronous Dave Chinner
2020-05-23 16:18   ` Darrick J. Wong
2020-05-23 21:22     ` Dave Chinner
2020-05-22  6:18 ` Amir Goldstein
2020-05-22 12:01   ` Amir Goldstein

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200522035029.3022405-15-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-XFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \
		linux-xfs@vger.kernel.org
	public-inbox-index linux-xfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git