[PATCH 02/10] xfs: track unlinked inactive inode fs summary counters

From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 02/10] xfs: track unlinked inactive inode fs summary counters
Date: Tue, 31 Dec 2019 17:08:52 -0800	[thread overview]
Message-ID: <157784093263.1362752.14373360314662413051.stgit@magnolia> (raw)
In-Reply-To: <157784092020.1362752.15046503361741521784.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

Set up counters to track the number of inodes and blocks that will be
freed from inactivating unlinked inodes.  We'll use this in the deferred
inactivation patch to hide the effects of deferred processing.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_inode.c |   54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_mount.h |    7 +++++++
 fs/xfs/xfs_super.c |   31 +++++++++++++++++++++++++++++-
 3 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 097a89826ba7..2fe8f030ebb8 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1806,6 +1806,60 @@ xfs_inactive_ifree(
 	return 0;
 }
 
+/*
+ * Play some accounting tricks with deferred inactivation of unlinked inodes so
+ * that it looks like the inode got freed immediately.  The superblock
+ * maintains counts of the number of inodes, data blocks, and rt blocks that
+ * would be freed if we were to force inode inactivation.  These counts are
+ * added to the statfs free counters outside of the regular fdblocks/ifree
+ * counters.  If userspace actually demands those "free" resources we'll force
+ * an inactivation scan to free things for real.
+ *
+ * Note that we can safely skip the block accounting trickery for complicated
+ * situations (inode with blocks on both devices, inode block counts that seem
+ * wrong) since the worst that happens is that statfs resource usage decreases
+ * more slowly.
+ *
+ * Positive @direction means we're setting up the accounting trick and
+ * negative undoes it.
+ */
+static inline void
+xfs_inode_iadjust(
+	struct xfs_inode	*ip,
+	int			direction)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_filblks_t		iblocks;
+	int64_t			inodes = 0;
+	int64_t			dblocks = 0;
+	int64_t			rblocks = 0;
+
+	ASSERT(direction != 0);
+
+	if (VFS_I(ip)->i_nlink == 0) {
+		inodes = 1;
+
+		iblocks = max_t(int64_t, 0, ip->i_d.di_nblocks +
+					    ip->i_delayed_blks);
+		if (!XFS_IS_REALTIME_INODE(ip))
+			dblocks = iblocks;
+		else if (!XFS_IFORK_Q(ip) ||
+			 XFS_IFORK_FORMAT(ip, XFS_ATTR_FORK) ==
+					XFS_DINODE_FMT_LOCAL)
+			rblocks = iblocks;
+	}
+
+	if (direction < 0) {
+		inodes = -inodes;
+		dblocks = -dblocks;
+		rblocks = -rblocks;
+	}
+
+	percpu_counter_add(&mp->m_iinactive, inodes);
+	percpu_counter_add(&mp->m_dinactive, dblocks);
+	percpu_counter_add(&mp->m_rinactive, rblocks);
+}
+
 /*
  * Returns true if we need to update the on-disk metadata before we can free
  * the memory used by this inode.  Updates include freeing post-eof
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 296223c2b782..d203c922dc51 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -85,6 +85,13 @@ typedef struct xfs_mount {
 	 */
 	struct percpu_counter	m_delalloc_blks;
 
+	/* Count of inodes waiting for inactivation. */
+	struct percpu_counter	m_iinactive;
+	/* Count of data device blocks waiting for inactivation. */
+	struct percpu_counter	m_dinactive;
+	/* Coult of realtime device blocks waiting for inactivation. */
+	struct percpu_counter	m_rinactive;
+
 	struct xfs_buf		*m_sb_bp;	/* buffer for superblock */
 	char			*m_rtname;	/* realtime device name */
 	char			*m_logname;	/* external log device name */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 03d95bf0952c..ed10ba2cd087 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -784,6 +784,8 @@ xfs_fs_statfs(
 	uint64_t		icount;
 	uint64_t		ifree;
 	uint64_t		fdblocks;
+	uint64_t		iinactive;
+	uint64_t		binactive;
 	xfs_extlen_t		lsize;
 	int64_t			ffree;
 
@@ -797,6 +799,7 @@ xfs_fs_statfs(
 	icount = percpu_counter_sum(&mp->m_icount);
 	ifree = percpu_counter_sum(&mp->m_ifree);
 	fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+	iinactive = percpu_counter_sum(&mp->m_iinactive);
 
 	spin_lock(&mp->m_sb_lock);
 	statp->f_bsize = sbp->sb_blocksize;
@@ -820,7 +823,7 @@ xfs_fs_statfs(
 					sbp->sb_icount);
 
 	/* make sure statp->f_ffree does not underflow */
-	ffree = statp->f_files - (icount - ifree);
+	ffree = statp->f_files - (icount - ifree) + iinactive;
 	statp->f_ffree = max_t(int64_t, ffree, 0);
 
 
@@ -834,7 +837,12 @@ xfs_fs_statfs(
 		statp->f_blocks = sbp->sb_rblocks;
 		statp->f_bavail = statp->f_bfree =
 			sbp->sb_frextents * sbp->sb_rextsize;
+		binactive = percpu_counter_sum(&mp->m_rinactive);
+	} else {
+		binactive = percpu_counter_sum(&mp->m_dinactive);
 	}
+	statp->f_bavail += binactive;
+	statp->f_bfree += binactive;
 
 	return 0;
 }
@@ -1024,8 +1032,26 @@ xfs_init_percpu_counters(
 	if (error)
 		goto free_fdblocks;
 
+	error = percpu_counter_init(&mp->m_iinactive, 0, GFP_KERNEL);
+	if (error)
+		goto free_delalloc;
+
+	error = percpu_counter_init(&mp->m_dinactive, 0, GFP_KERNEL);
+	if (error)
+		goto free_iinactive;
+
+	error = percpu_counter_init(&mp->m_rinactive, 0, GFP_KERNEL);
+	if (error)
+		goto free_dinactive;
+
 	return 0;
 
+free_dinactive:
+	percpu_counter_destroy(&mp->m_dinactive);
+free_iinactive:
+	percpu_counter_destroy(&mp->m_iinactive);
+free_delalloc:
+	percpu_counter_destroy(&mp->m_delalloc_blks);
 free_fdblocks:
 	percpu_counter_destroy(&mp->m_fdblocks);
 free_ifree:
@@ -1054,6 +1080,9 @@ xfs_destroy_percpu_counters(
 	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
 	       percpu_counter_sum(&mp->m_delalloc_blks) == 0);
 	percpu_counter_destroy(&mp->m_delalloc_blks);
+	percpu_counter_destroy(&mp->m_iinactive);
+	percpu_counter_destroy(&mp->m_dinactive);
+	percpu_counter_destroy(&mp->m_rinactive);
 }
 
 static void