All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xfs: collect errors from inodegc for unlinked inode recovery
@ 2023-05-30  0:19 Dave Chinner
  2023-05-30 19:00 ` kernel test robot
  2023-06-02  0:25 ` Darrick J. Wong
  0 siblings, 2 replies; 4+ messages in thread
From: Dave Chinner @ 2023-05-30  0:19 UTC (permalink / raw)
  To: linux-xfs

From: Dave Chinner <dchinner@redhat.com>

Unlinked list recovery requires errors removing the inode the from
the unlinked list get fed back to the main recovery loop. Now that
we offload the unlinking to the inodegc work, we don't get errors
being fed back when we trip over a corruption that prevents the
inode from being removed from the unlinked list.

This means we never clear the corrupt unlinked list bucket,
resulting in runtime operations eventually tripping over it and
shutting down.

Fix this by collecting inodegc worker errors and feed them
back to the flush caller. This is largely best effort - the only
context that really cares is log recovery, and it only flushes a
single inode at a time so we don't need complex synchronised
handling. Essentially the inodegc workers will capture the first
error that occurs and the next flush will gather them and clear
them. The flush itself will only report the first gathered error.

In the cases where callers can return errors, propagate the
collected inodegc flush error up the error handling chain.

In the case of inode unlinked list recovery, there are several
superfluous calls to flush queued unlinked inodes -
xlog_recover_iunlink_bucket() guarantees that it has flushed the
inodegc and collected errors before it returns. Hence nothing in the
calling path needs to run a flush, even when an error is returned.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_icache.c      | 46 ++++++++++++++++++++++++++++++++--------
 fs/xfs/xfs_icache.h      |  4 ++--
 fs/xfs/xfs_inode.c       | 18 +++++-----------
 fs/xfs/xfs_inode.h       |  2 +-
 fs/xfs/xfs_log_recover.c | 19 ++++++++---------
 fs/xfs/xfs_mount.h       |  1 +
 fs/xfs/xfs_super.c       |  1 +
 fs/xfs/xfs_trans.c       |  4 +++-
 8 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0f60e301eb1f..453890942d9f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -454,6 +454,27 @@ xfs_inodegc_queue_all(
 	return ret;
 }
 
+/* Wait for all queued work and collect errors */
+static int
+xfs_inodegc_wait_all(
+	struct xfs_mount	*mp)
+{
+	int			cpu;
+	int			error = 0;
+
+	flush_workqueue(mp->m_inodegc_wq);
+	for_each_online_cpu(cpu) {
+		struct xfs_inodegc	*gc;
+
+		gc = per_cpu_ptr(mp->m_inodegc, cpu);
+		if (gc->error && !error)
+			error = gc->error;
+		gc->error = 0;
+	}
+
+	return error;
+}
+
 /*
  * Check the validity of the inode we just found it the cache
  */
@@ -1491,15 +1512,14 @@ xfs_blockgc_free_space(
 	if (error)
 		return error;
 
-	xfs_inodegc_flush(mp);
-	return 0;
+	return xfs_inodegc_flush(mp);
 }
 
 /*
  * Reclaim all the free space that we can by scheduling the background blockgc
  * and inodegc workers immediately and waiting for them all to clear.
  */
-void
+int
 xfs_blockgc_flush_all(
 	struct xfs_mount	*mp)
 {
@@ -1520,7 +1540,7 @@ xfs_blockgc_flush_all(
 	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
 		flush_delayed_work(&pag->pag_blockgc_work);
 
-	xfs_inodegc_flush(mp);
+	return xfs_inodegc_flush(mp);
 }
 
 /*
@@ -1842,13 +1862,17 @@ xfs_inodegc_set_reclaimable(
  * This is the last chance to make changes to an otherwise unreferenced file
  * before incore reclamation happens.
  */
-static void
+static int
 xfs_inodegc_inactivate(
 	struct xfs_inode	*ip)
 {
+	int			error;
+
 	trace_xfs_inode_inactivating(ip);
-	xfs_inactive(ip);
+	error = xfs_inactive(ip);
 	xfs_inodegc_set_reclaimable(ip);
+	return error;
+
 }
 
 void
@@ -1880,8 +1904,12 @@ xfs_inodegc_worker(
 
 	WRITE_ONCE(gc->shrinker_hits, 0);
 	llist_for_each_entry_safe(ip, n, node, i_gclist) {
+		int	error;
+
 		xfs_iflags_set(ip, XFS_INACTIVATING);
-		xfs_inodegc_inactivate(ip);
+		error = xfs_inodegc_inactivate(ip);
+		if (error && !gc->error)
+			gc->error = error;
 	}
 
 	memalloc_nofs_restore(nofs_flag);
@@ -1905,13 +1933,13 @@ xfs_inodegc_push(
  * Force all currently queued inode inactivation work to run immediately and
  * wait for the work to finish.
  */
-void
+int
 xfs_inodegc_flush(
 	struct xfs_mount	*mp)
 {
 	xfs_inodegc_push(mp);
 	trace_xfs_inodegc_flush(mp, __return_address);
-	flush_workqueue(mp->m_inodegc_wq);
+	return xfs_inodegc_wait_all(mp);
 }
 
 /*
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 87910191a9dd..1dcdcb23796e 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -62,7 +62,7 @@ int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
 		unsigned int iwalk_flags);
 int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
 int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
-void xfs_blockgc_flush_all(struct xfs_mount *mp);
+int xfs_blockgc_flush_all(struct xfs_mount *mp);
 
 void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
 void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
@@ -80,7 +80,7 @@ void xfs_blockgc_start(struct xfs_mount *mp);
 
 void xfs_inodegc_worker(struct work_struct *work);
 void xfs_inodegc_push(struct xfs_mount *mp);
-void xfs_inodegc_flush(struct xfs_mount *mp);
+int xfs_inodegc_flush(struct xfs_mount *mp);
 void xfs_inodegc_stop(struct xfs_mount *mp);
 void xfs_inodegc_start(struct xfs_mount *mp);
 void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5808abab786c..c0d0466f3270 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1620,16 +1620,7 @@ xfs_inactive_ifree(
 	 */
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
 
-	/*
-	 * Just ignore errors at this point.  There is nothing we can do except
-	 * to try to keep going. Make sure it's not a silent error.
-	 */
-	error = xfs_trans_commit(tp);
-	if (error)
-		xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
-			__func__, error);
-
-	return 0;
+	return xfs_trans_commit(tp);
 }
 
 /*
@@ -1693,7 +1684,7 @@ xfs_inode_needs_inactive(
  * now be truncated.  Also, we clear all of the read-ahead state
  * kept for the inode here since the file is now closed.
  */
-void
+int
 xfs_inactive(
 	xfs_inode_t	*ip)
 {
@@ -1736,7 +1727,7 @@ xfs_inactive(
 		 * reference to the inode at this point anyways.
 		 */
 		if (xfs_can_free_eofblocks(ip, true))
-			xfs_free_eofblocks(ip);
+			error = xfs_free_eofblocks(ip);
 
 		goto out;
 	}
@@ -1773,7 +1764,7 @@ xfs_inactive(
 	/*
 	 * Free the inode.
 	 */
-	xfs_inactive_ifree(ip);
+	error = xfs_inactive_ifree(ip);
 
 out:
 	/*
@@ -1781,6 +1772,7 @@ xfs_inactive(
 	 * the attached dquots.
 	 */
 	xfs_qm_dqdetach(ip);
+	return error;
 }
 
 /*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 69d21e42c10a..7547caf2f2ab 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -470,7 +470,7 @@ enum layout_break_reason {
 	(xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID))
 
 int		xfs_release(struct xfs_inode *ip);
-void		xfs_inactive(struct xfs_inode *ip);
+int		xfs_inactive(struct xfs_inode *ip);
 int		xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name,
 			   struct xfs_inode **ipp, struct xfs_name *ci_name);
 int		xfs_create(struct mnt_idmap *idmap,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 322eb2ee6c55..82c81d20459d 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2711,7 +2711,9 @@ xlog_recover_iunlink_bucket(
 			 * just to flush the inodegc queue and wait for it to
 			 * complete.
 			 */
-			xfs_inodegc_flush(mp);
+			error = xfs_inodegc_flush(mp);
+			if (error)
+				break;
 		}
 
 		prev_agino = agino;
@@ -2719,10 +2721,15 @@ xlog_recover_iunlink_bucket(
 	}
 
 	if (prev_ip) {
+		int	error2;
+
 		ip->i_prev_unlinked = prev_agino;
 		xfs_irele(prev_ip);
+
+		error2 = xfs_inodegc_flush(mp);
+		if (error2 && !error)
+			return error2;
 	}
-	xfs_inodegc_flush(mp);
 	return error;
 }
 
@@ -2789,7 +2796,6 @@ xlog_recover_iunlink_ag(
 			 * bucket and remaining inodes on it unreferenced and
 			 * unfreeable.
 			 */
-			xfs_inodegc_flush(pag->pag_mount);
 			xlog_recover_clear_agi_bucket(pag, bucket);
 		}
 	}
@@ -2806,13 +2812,6 @@ xlog_recover_process_iunlinks(
 
 	for_each_perag(log->l_mp, agno, pag)
 		xlog_recover_iunlink_ag(pag);
-
-	/*
-	 * Flush the pending unlinked inodes to ensure that the inactivations
-	 * are fully completed on disk and the incore inodes can be reclaimed
-	 * before we signal that recovery is complete.
-	 */
-	xfs_inodegc_flush(log->l_mp);
 }
 
 STATIC void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index aaaf5ec13492..6c09f89534d3 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -62,6 +62,7 @@ struct xfs_error_cfg {
 struct xfs_inodegc {
 	struct llist_head	list;
 	struct delayed_work	work;
+	int			error;
 
 	/* approximate count of inodes in the list */
 	unsigned int		items;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 7e706255f165..4120bd1cba90 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1100,6 +1100,7 @@ xfs_inodegc_init_percpu(
 #endif
 		init_llist_head(&gc->list);
 		gc->items = 0;
+		gc->error = 0;
 		INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
 	}
 	return 0;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f81fbf081b01..8c0bfc9a33b1 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -290,7 +290,9 @@ xfs_trans_alloc(
 		 * Do not perform a synchronous scan because callers can hold
 		 * other locks.
 		 */
-		xfs_blockgc_flush_all(mp);
+		error = xfs_blockgc_flush_all(mp);
+		if (error)
+			return error;
 		want_retry = false;
 		goto retry;
 	}
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] xfs: collect errors from inodegc for unlinked inode recovery
  2023-05-30  0:19 [PATCH] xfs: collect errors from inodegc for unlinked inode recovery Dave Chinner
@ 2023-05-30 19:00 ` kernel test robot
  2023-06-02  0:25 ` Darrick J. Wong
  1 sibling, 0 replies; 4+ messages in thread
From: kernel test robot @ 2023-05-30 19:00 UTC (permalink / raw)
  To: Dave Chinner, linux-xfs; +Cc: llvm, oe-kbuild-all

Hi Dave,

kernel test robot noticed the following build warnings:

[auto build test WARNING on xfs-linux/for-next]
[also build test WARNING on linus/master v6.4-rc4 next-20230530]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Dave-Chinner/xfs-collect-errors-from-inodegc-for-unlinked-inode-recovery/20230530-082000
base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
patch link:    https://lore.kernel.org/r/20230530001928.2967218-1-david%40fromorbit.com
patch subject: [PATCH] xfs: collect errors from inodegc for unlinked inode recovery
config: i386-randconfig-i051-20230530 (https://download.01.org/0day-ci/archive/20230531/202305310236.wMEgOWKO-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        mkdir -p ~/bin
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/7e4e87bdccf0e418d6083d636f4aca7aa145f2b9
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Dave-Chinner/xfs-collect-errors-from-inodegc-for-unlinked-inode-recovery/20230530-082000
        git checkout 7e4e87bdccf0e418d6083d636f4aca7aa145f2b9
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=i386 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash fs/xfs/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305310236.wMEgOWKO-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> fs/xfs/xfs_inode.c:1729:7: warning: variable 'error' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
                   if (xfs_can_free_eofblocks(ip, true))
                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   fs/xfs/xfs_inode.c:1775:9: note: uninitialized use occurs here
           return error;
                  ^~~~~
   fs/xfs/xfs_inode.c:1729:3: note: remove the 'if' if its condition is always true
                   if (xfs_can_free_eofblocks(ip, true))
                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> fs/xfs/xfs_inode.c:1712:6: warning: variable 'error' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
           if (xfs_is_metadata_inode(ip))
               ^~~~~~~~~~~~~~~~~~~~~~~~~
   fs/xfs/xfs_inode.c:1775:9: note: uninitialized use occurs here
           return error;
                  ^~~~~
   fs/xfs/xfs_inode.c:1712:2: note: remove the 'if' if its condition is always false
           if (xfs_is_metadata_inode(ip))
           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   fs/xfs/xfs_inode.c:1708:6: warning: variable 'error' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
           if (xfs_is_readonly(mp))
               ^~~~~~~~~~~~~~~~~~~
   fs/xfs/xfs_inode.c:1775:9: note: uninitialized use occurs here
           return error;
                  ^~~~~
   fs/xfs/xfs_inode.c:1708:2: note: remove the 'if' if its condition is always false
           if (xfs_is_readonly(mp))
           ^~~~~~~~~~~~~~~~~~~~~~~~
   fs/xfs/xfs_inode.c:1699:6: warning: variable 'error' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
           if (VFS_I(ip)->i_mode == 0) {
               ^~~~~~~~~~~~~~~~~~~~~~
   fs/xfs/xfs_inode.c:1775:9: note: uninitialized use occurs here
           return error;
                  ^~~~~
   fs/xfs/xfs_inode.c:1699:2: note: remove the 'if' if its condition is always false
           if (VFS_I(ip)->i_mode == 0) {
           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   fs/xfs/xfs_inode.c:1692:13: note: initialize the variable 'error' to silence this warning
           int                     error;
                                        ^
                                         = 0
   4 warnings generated.


vim +1729 fs/xfs/xfs_inode.c

62af7d54a0ec0b Darrick J. Wong   2021-08-06  1678  
c24b5dfadc4a4f Dave Chinner      2013-08-12  1679  /*
c24b5dfadc4a4f Dave Chinner      2013-08-12  1680   * xfs_inactive
c24b5dfadc4a4f Dave Chinner      2013-08-12  1681   *
c24b5dfadc4a4f Dave Chinner      2013-08-12  1682   * This is called when the vnode reference count for the vnode
c24b5dfadc4a4f Dave Chinner      2013-08-12  1683   * goes to zero.  If the file has been unlinked, then it must
c24b5dfadc4a4f Dave Chinner      2013-08-12  1684   * now be truncated.  Also, we clear all of the read-ahead state
c24b5dfadc4a4f Dave Chinner      2013-08-12  1685   * kept for the inode here since the file is now closed.
c24b5dfadc4a4f Dave Chinner      2013-08-12  1686   */
7e4e87bdccf0e4 Dave Chinner      2023-05-30  1687  int
c24b5dfadc4a4f Dave Chinner      2013-08-12  1688  xfs_inactive(
c24b5dfadc4a4f Dave Chinner      2013-08-12  1689  	xfs_inode_t	*ip)
c24b5dfadc4a4f Dave Chinner      2013-08-12  1690  {
3d3c8b5222b924 Jie Liu           2013-08-12  1691  	struct xfs_mount	*mp;
c24b5dfadc4a4f Dave Chinner      2013-08-12  1692  	int			error;
c24b5dfadc4a4f Dave Chinner      2013-08-12  1693  	int			truncate = 0;
c24b5dfadc4a4f Dave Chinner      2013-08-12  1694  
c24b5dfadc4a4f Dave Chinner      2013-08-12  1695  	/*
c24b5dfadc4a4f Dave Chinner      2013-08-12  1696  	 * If the inode is already free, then there can be nothing
c24b5dfadc4a4f Dave Chinner      2013-08-12  1697  	 * to clean up here.
c24b5dfadc4a4f Dave Chinner      2013-08-12  1698  	 */
c19b3b05ae440d Dave Chinner      2016-02-09  1699  	if (VFS_I(ip)->i_mode == 0) {
c24b5dfadc4a4f Dave Chinner      2013-08-12  1700  		ASSERT(ip->i_df.if_broot_bytes == 0);
3ea06d73e3c02e Darrick J. Wong   2021-05-31  1701  		goto out;
c24b5dfadc4a4f Dave Chinner      2013-08-12  1702  	}
c24b5dfadc4a4f Dave Chinner      2013-08-12  1703  
c24b5dfadc4a4f Dave Chinner      2013-08-12  1704  	mp = ip->i_mount;
17c12bcd3030e4 Darrick J. Wong   2016-10-03  1705  	ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
c24b5dfadc4a4f Dave Chinner      2013-08-12  1706  
c24b5dfadc4a4f Dave Chinner      2013-08-12  1707  	/* If this is a read-only mount, don't do this (would generate I/O) */
2e973b2cd4cdb9 Dave Chinner      2021-08-18  1708  	if (xfs_is_readonly(mp))
3ea06d73e3c02e Darrick J. Wong   2021-05-31  1709  		goto out;
c24b5dfadc4a4f Dave Chinner      2013-08-12  1710  
383e32b0d0db46 Darrick J. Wong   2021-03-22  1711  	/* Metadata inodes require explicit resource cleanup. */
383e32b0d0db46 Darrick J. Wong   2021-03-22 @1712  	if (xfs_is_metadata_inode(ip))
3ea06d73e3c02e Darrick J. Wong   2021-05-31  1713  		goto out;
383e32b0d0db46 Darrick J. Wong   2021-03-22  1714  
6231848c3aa5c7 Darrick J. Wong   2018-03-06  1715  	/* Try to clean out the cow blocks if there are any. */
51d626903083f7 Christoph Hellwig 2018-07-17  1716  	if (xfs_inode_has_cow_data(ip))
6231848c3aa5c7 Darrick J. Wong   2018-03-06  1717  		xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
6231848c3aa5c7 Darrick J. Wong   2018-03-06  1718  
54d7b5c1d03e97 Dave Chinner      2016-02-09  1719  	if (VFS_I(ip)->i_nlink != 0) {
c24b5dfadc4a4f Dave Chinner      2013-08-12  1720  		/*
c24b5dfadc4a4f Dave Chinner      2013-08-12  1721  		 * force is true because we are evicting an inode from the
c24b5dfadc4a4f Dave Chinner      2013-08-12  1722  		 * cache. Post-eof blocks must be freed, lest we end up with
c24b5dfadc4a4f Dave Chinner      2013-08-12  1723  		 * broken free space accounting.
3b4683c294095b Brian Foster      2017-04-11  1724  		 *
3b4683c294095b Brian Foster      2017-04-11  1725  		 * Note: don't bother with iolock here since lockdep complains
3b4683c294095b Brian Foster      2017-04-11  1726  		 * about acquiring it in reclaim context. We have the only
3b4683c294095b Brian Foster      2017-04-11  1727  		 * reference to the inode at this point anyways.
c24b5dfadc4a4f Dave Chinner      2013-08-12  1728  		 */
3b4683c294095b Brian Foster      2017-04-11 @1729  		if (xfs_can_free_eofblocks(ip, true))
7e4e87bdccf0e4 Dave Chinner      2023-05-30  1730  			error = xfs_free_eofblocks(ip);
74564fb48cbfcb Brian Foster      2013-09-20  1731  
3ea06d73e3c02e Darrick J. Wong   2021-05-31  1732  		goto out;
c24b5dfadc4a4f Dave Chinner      2013-08-12  1733  	}
c24b5dfadc4a4f Dave Chinner      2013-08-12  1734  
c19b3b05ae440d Dave Chinner      2016-02-09  1735  	if (S_ISREG(VFS_I(ip)->i_mode) &&
13d2c10b05d8e6 Christoph Hellwig 2021-03-29  1736  	    (ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 ||
daf83964a3681c Christoph Hellwig 2020-05-18  1737  	     ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
c24b5dfadc4a4f Dave Chinner      2013-08-12  1738  		truncate = 1;
c24b5dfadc4a4f Dave Chinner      2013-08-12  1739  
c14cfccabe2af2 Darrick J. Wong   2018-05-04  1740  	error = xfs_qm_dqattach(ip);
c24b5dfadc4a4f Dave Chinner      2013-08-12  1741  	if (error)
3ea06d73e3c02e Darrick J. Wong   2021-05-31  1742  		goto out;
c24b5dfadc4a4f Dave Chinner      2013-08-12  1743  
c19b3b05ae440d Dave Chinner      2016-02-09  1744  	if (S_ISLNK(VFS_I(ip)->i_mode))
36b21dde6e899d Brian Foster      2013-09-20  1745  		error = xfs_inactive_symlink(ip);
f7be2d7f594cbc Brian Foster      2013-09-20  1746  	else if (truncate)
f7be2d7f594cbc Brian Foster      2013-09-20  1747  		error = xfs_inactive_truncate(ip);
36b21dde6e899d Brian Foster      2013-09-20  1748  	if (error)
3ea06d73e3c02e Darrick J. Wong   2021-05-31  1749  		goto out;
c24b5dfadc4a4f Dave Chinner      2013-08-12  1750  
c24b5dfadc4a4f Dave Chinner      2013-08-12  1751  	/*
c24b5dfadc4a4f Dave Chinner      2013-08-12  1752  	 * If there are attributes associated with the file then blow them away
c24b5dfadc4a4f Dave Chinner      2013-08-12  1753  	 * now.  The code calls a routine that recursively deconstructs the
6dfe5a049f2d48 Dave Chinner      2015-05-29  1754  	 * attribute fork. If also blows away the in-core attribute fork.
c24b5dfadc4a4f Dave Chinner      2013-08-12  1755  	 */
932b42c66cb5d0 Darrick J. Wong   2022-07-09  1756  	if (xfs_inode_has_attr_fork(ip)) {
c24b5dfadc4a4f Dave Chinner      2013-08-12  1757  		error = xfs_attr_inactive(ip);
c24b5dfadc4a4f Dave Chinner      2013-08-12  1758  		if (error)
3ea06d73e3c02e Darrick J. Wong   2021-05-31  1759  			goto out;
f7be2d7f594cbc Brian Foster      2013-09-20  1760  	}
f7be2d7f594cbc Brian Foster      2013-09-20  1761  
7821ea302dca72 Christoph Hellwig 2021-03-29  1762  	ASSERT(ip->i_forkoff == 0);
c24b5dfadc4a4f Dave Chinner      2013-08-12  1763  
c24b5dfadc4a4f Dave Chinner      2013-08-12  1764  	/*
c24b5dfadc4a4f Dave Chinner      2013-08-12  1765  	 * Free the inode.
c24b5dfadc4a4f Dave Chinner      2013-08-12  1766  	 */
7e4e87bdccf0e4 Dave Chinner      2023-05-30  1767  	error = xfs_inactive_ifree(ip);
c24b5dfadc4a4f Dave Chinner      2013-08-12  1768  
3ea06d73e3c02e Darrick J. Wong   2021-05-31  1769  out:
c24b5dfadc4a4f Dave Chinner      2013-08-12  1770  	/*
3ea06d73e3c02e Darrick J. Wong   2021-05-31  1771  	 * We're done making metadata updates for this inode, so we can release
3ea06d73e3c02e Darrick J. Wong   2021-05-31  1772  	 * the attached dquots.
c24b5dfadc4a4f Dave Chinner      2013-08-12  1773  	 */
c24b5dfadc4a4f Dave Chinner      2013-08-12  1774  	xfs_qm_dqdetach(ip);
7e4e87bdccf0e4 Dave Chinner      2023-05-30  1775  	return error;
c24b5dfadc4a4f Dave Chinner      2013-08-12  1776  }
c24b5dfadc4a4f Dave Chinner      2013-08-12  1777  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] xfs: collect errors from inodegc for unlinked inode recovery
  2023-05-30  0:19 [PATCH] xfs: collect errors from inodegc for unlinked inode recovery Dave Chinner
  2023-05-30 19:00 ` kernel test robot
@ 2023-06-02  0:25 ` Darrick J. Wong
  2023-06-02  4:01   ` Dave Chinner
  1 sibling, 1 reply; 4+ messages in thread
From: Darrick J. Wong @ 2023-06-02  0:25 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On Tue, May 30, 2023 at 10:19:28AM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Unlinked list recovery requires errors removing the inode the from
> the unlinked list get fed back to the main recovery loop. Now that
> we offload the unlinking to the inodegc work, we don't get errors
> being fed back when we trip over a corruption that prevents the
> inode from being removed from the unlinked list.
> 
> This means we never clear the corrupt unlinked list bucket,
> resulting in runtime operations eventually tripping over it and
> shutting down.
> 
> Fix this by collecting inodegc worker errors and feed them
> back to the flush caller. This is largely best effort - the only
> context that really cares is log recovery, and it only flushes a
> single inode at a time so we don't need complex synchronised
> handling. Essentially the inodegc workers will capture the first
> error that occurs and the next flush will gather them and clear
> them. The flush itself will only report the first gathered error.
> 
> In the cases where callers can return errors, propagate the
> collected inodegc flush error up the error handling chain.
> 
> In the case of inode unlinked list recovery, there are several
> superfluous calls to flush queued unlinked inodes -
> xlog_recover_iunlink_bucket() guarantees that it has flushed the
> inodegc and collected errors before it returns. Hence nothing in the
> calling path needs to run a flush, even when an error is returned.

Hmm.  So I guess what you're saying is that xfs_inactive now returns
negative errnos, and everything that calls down to that function will
pass the error upwards through the stack?

Any of those call paths that already could handle a negative errno will
now fail on a corrupt inactive inode; and the only place that will
silently "drop" the negative errno is unmount?

If 'yes' and 'yes' and the kbuild robot warnings get fixed,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_icache.c      | 46 ++++++++++++++++++++++++++++++++--------
>  fs/xfs/xfs_icache.h      |  4 ++--
>  fs/xfs/xfs_inode.c       | 18 +++++-----------
>  fs/xfs/xfs_inode.h       |  2 +-
>  fs/xfs/xfs_log_recover.c | 19 ++++++++---------
>  fs/xfs/xfs_mount.h       |  1 +
>  fs/xfs/xfs_super.c       |  1 +
>  fs/xfs/xfs_trans.c       |  4 +++-
>  8 files changed, 59 insertions(+), 36 deletions(-)
> 
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index 0f60e301eb1f..453890942d9f 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -454,6 +454,27 @@ xfs_inodegc_queue_all(
>  	return ret;
>  }
>  
> +/* Wait for all queued work and collect errors */
> +static int
> +xfs_inodegc_wait_all(
> +	struct xfs_mount	*mp)
> +{
> +	int			cpu;
> +	int			error = 0;
> +
> +	flush_workqueue(mp->m_inodegc_wq);
> +	for_each_online_cpu(cpu) {
> +		struct xfs_inodegc	*gc;
> +
> +		gc = per_cpu_ptr(mp->m_inodegc, cpu);
> +		if (gc->error && !error)
> +			error = gc->error;
> +		gc->error = 0;
> +	}
> +
> +	return error;
> +}
> +
>  /*
>   * Check the validity of the inode we just found it the cache
>   */
> @@ -1491,15 +1512,14 @@ xfs_blockgc_free_space(
>  	if (error)
>  		return error;
>  
> -	xfs_inodegc_flush(mp);
> -	return 0;
> +	return xfs_inodegc_flush(mp);
>  }
>  
>  /*
>   * Reclaim all the free space that we can by scheduling the background blockgc
>   * and inodegc workers immediately and waiting for them all to clear.
>   */
> -void
> +int
>  xfs_blockgc_flush_all(
>  	struct xfs_mount	*mp)
>  {
> @@ -1520,7 +1540,7 @@ xfs_blockgc_flush_all(
>  	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
>  		flush_delayed_work(&pag->pag_blockgc_work);
>  
> -	xfs_inodegc_flush(mp);
> +	return xfs_inodegc_flush(mp);
>  }
>  
>  /*
> @@ -1842,13 +1862,17 @@ xfs_inodegc_set_reclaimable(
>   * This is the last chance to make changes to an otherwise unreferenced file
>   * before incore reclamation happens.
>   */
> -static void
> +static int
>  xfs_inodegc_inactivate(
>  	struct xfs_inode	*ip)
>  {
> +	int			error;
> +
>  	trace_xfs_inode_inactivating(ip);
> -	xfs_inactive(ip);
> +	error = xfs_inactive(ip);
>  	xfs_inodegc_set_reclaimable(ip);
> +	return error;
> +
>  }
>  
>  void
> @@ -1880,8 +1904,12 @@ xfs_inodegc_worker(
>  
>  	WRITE_ONCE(gc->shrinker_hits, 0);
>  	llist_for_each_entry_safe(ip, n, node, i_gclist) {
> +		int	error;
> +
>  		xfs_iflags_set(ip, XFS_INACTIVATING);
> -		xfs_inodegc_inactivate(ip);
> +		error = xfs_inodegc_inactivate(ip);
> +		if (error && !gc->error)
> +			gc->error = error;
>  	}
>  
>  	memalloc_nofs_restore(nofs_flag);
> @@ -1905,13 +1933,13 @@ xfs_inodegc_push(
>   * Force all currently queued inode inactivation work to run immediately and
>   * wait for the work to finish.
>   */
> -void
> +int
>  xfs_inodegc_flush(
>  	struct xfs_mount	*mp)
>  {
>  	xfs_inodegc_push(mp);
>  	trace_xfs_inodegc_flush(mp, __return_address);
> -	flush_workqueue(mp->m_inodegc_wq);
> +	return xfs_inodegc_wait_all(mp);
>  }
>  
>  /*
> diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
> index 87910191a9dd..1dcdcb23796e 100644
> --- a/fs/xfs/xfs_icache.h
> +++ b/fs/xfs/xfs_icache.h
> @@ -62,7 +62,7 @@ int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
>  		unsigned int iwalk_flags);
>  int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
>  int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
> -void xfs_blockgc_flush_all(struct xfs_mount *mp);
> +int xfs_blockgc_flush_all(struct xfs_mount *mp);
>  
>  void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
>  void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
> @@ -80,7 +80,7 @@ void xfs_blockgc_start(struct xfs_mount *mp);
>  
>  void xfs_inodegc_worker(struct work_struct *work);
>  void xfs_inodegc_push(struct xfs_mount *mp);
> -void xfs_inodegc_flush(struct xfs_mount *mp);
> +int xfs_inodegc_flush(struct xfs_mount *mp);
>  void xfs_inodegc_stop(struct xfs_mount *mp);
>  void xfs_inodegc_start(struct xfs_mount *mp);
>  void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu);
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 5808abab786c..c0d0466f3270 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -1620,16 +1620,7 @@ xfs_inactive_ifree(
>  	 */
>  	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
>  
> -	/*
> -	 * Just ignore errors at this point.  There is nothing we can do except
> -	 * to try to keep going. Make sure it's not a silent error.
> -	 */
> -	error = xfs_trans_commit(tp);
> -	if (error)
> -		xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
> -			__func__, error);
> -
> -	return 0;
> +	return xfs_trans_commit(tp);
>  }
>  
>  /*
> @@ -1693,7 +1684,7 @@ xfs_inode_needs_inactive(
>   * now be truncated.  Also, we clear all of the read-ahead state
>   * kept for the inode here since the file is now closed.
>   */
> -void
> +int
>  xfs_inactive(
>  	xfs_inode_t	*ip)
>  {
> @@ -1736,7 +1727,7 @@ xfs_inactive(
>  		 * reference to the inode at this point anyways.
>  		 */
>  		if (xfs_can_free_eofblocks(ip, true))
> -			xfs_free_eofblocks(ip);
> +			error = xfs_free_eofblocks(ip);
>  
>  		goto out;
>  	}
> @@ -1773,7 +1764,7 @@ xfs_inactive(
>  	/*
>  	 * Free the inode.
>  	 */
> -	xfs_inactive_ifree(ip);
> +	error = xfs_inactive_ifree(ip);
>  
>  out:
>  	/*
> @@ -1781,6 +1772,7 @@ xfs_inactive(
>  	 * the attached dquots.
>  	 */
>  	xfs_qm_dqdetach(ip);
> +	return error;
>  }
>  
>  /*
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 69d21e42c10a..7547caf2f2ab 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -470,7 +470,7 @@ enum layout_break_reason {
>  	(xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID))
>  
>  int		xfs_release(struct xfs_inode *ip);
> -void		xfs_inactive(struct xfs_inode *ip);
> +int		xfs_inactive(struct xfs_inode *ip);
>  int		xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name,
>  			   struct xfs_inode **ipp, struct xfs_name *ci_name);
>  int		xfs_create(struct mnt_idmap *idmap,
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 322eb2ee6c55..82c81d20459d 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2711,7 +2711,9 @@ xlog_recover_iunlink_bucket(
>  			 * just to flush the inodegc queue and wait for it to
>  			 * complete.
>  			 */
> -			xfs_inodegc_flush(mp);
> +			error = xfs_inodegc_flush(mp);
> +			if (error)
> +				break;
>  		}
>  
>  		prev_agino = agino;
> @@ -2719,10 +2721,15 @@ xlog_recover_iunlink_bucket(
>  	}
>  
>  	if (prev_ip) {
> +		int	error2;
> +
>  		ip->i_prev_unlinked = prev_agino;
>  		xfs_irele(prev_ip);
> +
> +		error2 = xfs_inodegc_flush(mp);
> +		if (error2 && !error)
> +			return error2;
>  	}
> -	xfs_inodegc_flush(mp);
>  	return error;
>  }
>  
> @@ -2789,7 +2796,6 @@ xlog_recover_iunlink_ag(
>  			 * bucket and remaining inodes on it unreferenced and
>  			 * unfreeable.
>  			 */
> -			xfs_inodegc_flush(pag->pag_mount);
>  			xlog_recover_clear_agi_bucket(pag, bucket);
>  		}
>  	}
> @@ -2806,13 +2812,6 @@ xlog_recover_process_iunlinks(
>  
>  	for_each_perag(log->l_mp, agno, pag)
>  		xlog_recover_iunlink_ag(pag);
> -
> -	/*
> -	 * Flush the pending unlinked inodes to ensure that the inactivations
> -	 * are fully completed on disk and the incore inodes can be reclaimed
> -	 * before we signal that recovery is complete.
> -	 */
> -	xfs_inodegc_flush(log->l_mp);
>  }
>  
>  STATIC void
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index aaaf5ec13492..6c09f89534d3 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -62,6 +62,7 @@ struct xfs_error_cfg {
>  struct xfs_inodegc {
>  	struct llist_head	list;
>  	struct delayed_work	work;
> +	int			error;
>  
>  	/* approximate count of inodes in the list */
>  	unsigned int		items;
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 7e706255f165..4120bd1cba90 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -1100,6 +1100,7 @@ xfs_inodegc_init_percpu(
>  #endif
>  		init_llist_head(&gc->list);
>  		gc->items = 0;
> +		gc->error = 0;
>  		INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
>  	}
>  	return 0;
> diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
> index f81fbf081b01..8c0bfc9a33b1 100644
> --- a/fs/xfs/xfs_trans.c
> +++ b/fs/xfs/xfs_trans.c
> @@ -290,7 +290,9 @@ xfs_trans_alloc(
>  		 * Do not perform a synchronous scan because callers can hold
>  		 * other locks.
>  		 */
> -		xfs_blockgc_flush_all(mp);
> +		error = xfs_blockgc_flush_all(mp);
> +		if (error)
> +			return error;
>  		want_retry = false;
>  		goto retry;
>  	}
> -- 
> 2.40.1
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] xfs: collect errors from inodegc for unlinked inode recovery
  2023-06-02  0:25 ` Darrick J. Wong
@ 2023-06-02  4:01   ` Dave Chinner
  0 siblings, 0 replies; 4+ messages in thread
From: Dave Chinner @ 2023-06-02  4:01 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Thu, Jun 01, 2023 at 05:25:00PM -0700, Darrick J. Wong wrote:
> On Tue, May 30, 2023 at 10:19:28AM +1000, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > Unlinked list recovery requires errors removing the inode the from
> > the unlinked list get fed back to the main recovery loop. Now that
> > we offload the unlinking to the inodegc work, we don't get errors
> > being fed back when we trip over a corruption that prevents the
> > inode from being removed from the unlinked list.
> > 
> > This means we never clear the corrupt unlinked list bucket,
> > resulting in runtime operations eventually tripping over it and
> > shutting down.
> > 
> > Fix this by collecting inodegc worker errors and feed them
> > back to the flush caller. This is largely best effort - the only
> > context that really cares is log recovery, and it only flushes a
> > single inode at a time so we don't need complex synchronised
> > handling. Essentially the inodegc workers will capture the first
> > error that occurs and the next flush will gather them and clear
> > them. The flush itself will only report the first gathered error.
> > 
> > In the cases where callers can return errors, propagate the
> > collected inodegc flush error up the error handling chain.
> > 
> > In the case of inode unlinked list recovery, there are several
> > superfluous calls to flush queued unlinked inodes -
> > xlog_recover_iunlink_bucket() guarantees that it has flushed the
> > inodegc and collected errors before it returns. Hence nothing in the
> > calling path needs to run a flush, even when an error is returned.
> 
> Hmm.  So I guess what you're saying is that xfs_inactive now returns
> negative errnos, and everything that calls down to that function will
> pass the error upwards through the stack?

Yes. Effectively inodegc workers capture the errors from
xfs_inactive(), and the next xfs_inode_flush() call will gather the
errors, clear them and report them to the caller. It's then up to
the caller to decide what to do with an error from
xfs_inode_flush()...

> Any of those call paths that already could handle a negative errno will
> now fail on a corrupt inactive inode; and the only place that will
> silently "drop" the negative errno is unmount?

Yes. That is the largely the what I've tried to do. The main thing
was to make the unlinked inode error reporting work, and having
other paths (like remount,ro) also be able to fail and propagate
errors is a bonus.

> If 'yes' and 'yes' and the kbuild robot warnings get fixed,
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>

Thanks!

-Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-06-02  4:01 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-30  0:19 [PATCH] xfs: collect errors from inodegc for unlinked inode recovery Dave Chinner
2023-05-30 19:00 ` kernel test robot
2023-06-02  0:25 ` Darrick J. Wong
2023-06-02  4:01   ` Dave Chinner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.