All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 08/21] xfs: defer iput on certain inodes while scrub / repair are running
Date: Sun, 24 Jun 2018 12:24:20 -0700	[thread overview]
Message-ID: <152986826018.3155.9241833069276452949.stgit@magnolia> (raw)
In-Reply-To: <152986820984.3155.16417868536016544528.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

Destroying an incore inode sometimes requires some work to be done on
the inode.  For example, post-EOF blocks on a non-PREALLOC inode are
trimmed, and copy-on-write staging extents are freed.  This work is done
in separate transactions, which is bad for scrub and repair because (a)
we already have a transaction and can't nest them, and (b) if we've
frozen the filesystem for scrub/repair work, that (regular) transaction
allocation will block on the freeze.

Therefore, if we detect that work has to be done to destroy the incore
inode, we'll just hang on to the reference until after the scrub is
finished.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/scrub/common.c |   52 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/common.h |    1 +
 fs/xfs/scrub/dir.c    |    2 +-
 fs/xfs/scrub/parent.c |    6 +++---
 fs/xfs/scrub/scrub.c  |   20 +++++++++++++++++++
 fs/xfs/scrub/scrub.h  |    9 ++++++++
 fs/xfs/scrub/trace.h  |   30 ++++++++++++++++++++++++++++
 7 files changed, 116 insertions(+), 4 deletions(-)


diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index c1132a40a366..9740c28384b6 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -22,6 +22,7 @@
 #include "xfs_alloc_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_bmap_btree.h"
+#include "xfs_bmap_util.h"
 #include "xfs_ialloc.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_refcount.h"
@@ -890,3 +891,54 @@ xfs_scrub_ilock_inverted(
 	}
 	return -EDEADLOCK;
 }
+
+/*
+ * Release a reference to an inode while the fs is running a scrub or repair.
+ * If we anticipate that destroying the incore inode will require work to be
+ * done, we'll defer the iput until after the scrub/repair releases the
+ * transaction.
+ */
+void
+xfs_scrub_iput(
+	struct xfs_scrub_context	*sc,
+	struct xfs_inode		*ip)
+{
+	/*
+	 * If this file doesn't have any blocks to be freed at release time,
+	 * go straight to iput.
+	 */
+	if (!xfs_can_free_eofblocks(ip, true))
+		goto iput;
+
+	/*
+	 * Any real/unwritten extents in the CoW fork will have to be freed
+	 * so iput if there aren't any.
+	 */
+	if (!xfs_inode_has_cow_blocks(ip))
+		goto iput;
+
+	/*
+	 * Any blocks after the end of the file will have to be freed so iput
+	 * if there aren't any.
+	 */
+	if (!xfs_inode_has_posteof_blocks(ip))
+		goto iput;
+
+	/*
+	 * There are no other users of i_private in XFS so if it's non-NULL
+	 * this inode is already on the deferred iput list and we can release
+	 * this reference.
+	 */
+	if (VFS_I(ip)->i_private)
+		goto iput;
+
+	/* Otherwise, add it to the deferred iput list. */
+	trace_xfs_scrub_iput_defer(ip, __return_address);
+	VFS_I(ip)->i_private = sc->deferred_iput_list;
+	sc->deferred_iput_list = VFS_I(ip);
+	return;
+
+iput:
+	trace_xfs_scrub_iput_now(ip, __return_address);
+	iput(VFS_I(ip));
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 2172bd5361e2..ca9e15af2a4f 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -140,5 +140,6 @@ static inline bool xfs_scrub_skip_xref(struct xfs_scrub_metadata *sm)
 
 int xfs_scrub_metadata_inode_forks(struct xfs_scrub_context *sc);
 int xfs_scrub_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
+void xfs_scrub_iput(struct xfs_scrub_context *sc, struct xfs_inode *ip);
 
 #endif	/* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 86324775fc9b..5cb371576732 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -87,7 +87,7 @@ xfs_scrub_dir_check_ftype(
 			xfs_mode_to_ftype(VFS_I(ip)->i_mode));
 	if (ino_dtype != dtype)
 		xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
-	iput(VFS_I(ip));
+	xfs_scrub_iput(sdc->sc, ip);
 out:
 	return error;
 }
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index e2bda58c32f0..fd0b2bfb8f18 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -230,11 +230,11 @@ xfs_scrub_parent_validate(
 
 	/* Drat, parent changed.  Try again! */
 	if (dnum != dp->i_ino) {
-		iput(VFS_I(dp));
+		xfs_scrub_iput(sc, dp);
 		*try_again = true;
 		return 0;
 	}
-	iput(VFS_I(dp));
+	xfs_scrub_iput(sc, dp);
 
 	/*
 	 * '..' didn't change, so check that there was only one entry
@@ -247,7 +247,7 @@ xfs_scrub_parent_validate(
 out_unlock:
 	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
 out_rele:
-	iput(VFS_I(dp));
+	xfs_scrub_iput(sc, dp);
 out:
 	return error;
 }
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index fec0e130f19e..b66cfbc56a34 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -157,6 +157,24 @@ xfs_scrub_probe(
 
 /* Scrub setup and teardown */
 
+/* Release all references to inodes we encountered needing deferred iput. */
+STATIC void
+xfs_scrub_iput_deferred(
+	struct xfs_scrub_context	*sc)
+{
+	struct inode			*inode, *next;
+
+	inode = sc->deferred_iput_list;
+	while (inode != (struct inode *)sc) {
+		next = inode->i_private;
+		inode->i_private = NULL;
+		trace_xfs_scrub_iput_deferred(XFS_I(inode), __return_address);
+		iput(inode);
+		inode = next;
+	}
+	sc->deferred_iput_list = sc;
+}
+
 /* Free all the resources and finish the transactions. */
 STATIC int
 xfs_scrub_teardown(
@@ -180,6 +198,7 @@ xfs_scrub_teardown(
 			iput(VFS_I(sc->ip));
 		sc->ip = NULL;
 	}
+	xfs_scrub_iput_deferred(sc);
 	if (sc->has_quotaofflock)
 		mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
 	if (sc->buf) {
@@ -506,6 +525,7 @@ xfs_scrub_metadata(
 	sc.ops = &meta_scrub_ops[sm->sm_type];
 	sc.try_harder = try_harder;
 	sc.sa.agno = NULLAGNUMBER;
+	sc.deferred_iput_list = &sc;
 	error = sc.ops->setup(&sc, ip);
 	if (error)
 		goto out_teardown;
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index b295edd5fc0e..69eee2ffed29 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -65,6 +65,15 @@ struct xfs_scrub_context {
 	bool				try_harder;
 	bool				has_quotaofflock;
 
+	/*
+	 * List of inodes which cannot be released (by scrub) until after the
+	 * scrub operation concludes because we'd have to do some work to the
+	 * inode to destroy its incore representation (cow blocks, posteof
+	 * blocks, etc.).  Each inode's i_private points to the next inode, or
+	 * to the scrub context as a sentinel for the end of the list.
+	 */
+	void				*deferred_iput_list;
+
 	/* State tracking for single-AG operations. */
 	struct xfs_scrub_ag		sa;
 };
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index cec3e5ece5a1..a050a00fc258 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -480,6 +480,36 @@ TRACE_EVENT(xfs_scrub_xref_error,
 		  __entry->ret_ip)
 );
 
+DECLARE_EVENT_CLASS(xfs_scrub_iref_class,
+	TP_PROTO(struct xfs_inode *ip, xfs_failaddr_t caller_ip),
+	TP_ARGS(ip, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(int, count)
+		__field(xfs_failaddr_t, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->count = atomic_read(&VFS_I(ip)->i_count);
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d ino 0x%llx count %d caller %pS",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->count,
+		  __entry->caller_ip)
+)
+
+#define DEFINE_SCRUB_IREF_EVENT(name) \
+DEFINE_EVENT(xfs_scrub_iref_class, name, \
+	TP_PROTO(struct xfs_inode *ip, xfs_failaddr_t caller_ip), \
+	TP_ARGS(ip, caller_ip))
+DEFINE_SCRUB_IREF_EVENT(xfs_scrub_iput_deferred);
+DEFINE_SCRUB_IREF_EVENT(xfs_scrub_iput_defer);
+DEFINE_SCRUB_IREF_EVENT(xfs_scrub_iput_now);
+
 /* repair tracepoints */
 #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
 


  parent reply	other threads:[~2018-06-24 19:24 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-24 19:23 [PATCH v16 00/21] xfs-4.19: online repair support Darrick J. Wong
2018-06-24 19:23 ` [PATCH 01/21] xfs: don't assume a left rmap when allocating a new rmap Darrick J. Wong
2018-06-27  0:54   ` Dave Chinner
2018-06-28 21:11   ` Allison Henderson
2018-06-29 14:39     ` Darrick J. Wong
2018-06-24 19:23 ` [PATCH 02/21] xfs: add helper to decide if an inode has allocated cow blocks Darrick J. Wong
2018-06-27  1:02   ` Dave Chinner
2018-06-28 21:12   ` Allison Henderson
2018-06-24 19:23 ` [PATCH 03/21] xfs: refactor part of xfs_free_eofblocks Darrick J. Wong
2018-06-28 21:13   ` Allison Henderson
2018-06-24 19:23 ` [PATCH 04/21] xfs: repair the AGF and AGFL Darrick J. Wong
2018-06-27  2:19   ` Dave Chinner
2018-06-27 16:44     ` Allison Henderson
2018-06-27 23:37       ` Dave Chinner
2018-06-29 15:14         ` Darrick J. Wong
2018-06-28 17:25     ` Allison Henderson
2018-06-29 15:08       ` Darrick J. Wong
2018-06-28 21:14   ` Allison Henderson
2018-06-28 23:21     ` Dave Chinner
2018-06-29  1:35       ` Allison Henderson
2018-06-29 14:55         ` Darrick J. Wong
2018-06-24 19:24 ` [PATCH 05/21] xfs: repair the AGI Darrick J. Wong
2018-06-27  2:22   ` Dave Chinner
2018-06-28 21:15   ` Allison Henderson
2018-06-24 19:24 ` [PATCH 06/21] xfs: repair free space btrees Darrick J. Wong
2018-06-27  3:21   ` Dave Chinner
2018-07-04  2:15     ` Darrick J. Wong
2018-07-04  2:25       ` Dave Chinner
2018-06-30 17:36   ` Allison Henderson
2018-06-24 19:24 ` [PATCH 07/21] xfs: repair inode btrees Darrick J. Wong
2018-06-28  0:55   ` Dave Chinner
2018-07-04  2:22     ` Darrick J. Wong
2018-06-30 17:36   ` Allison Henderson
2018-06-30 18:30     ` Darrick J. Wong
2018-07-01  0:45       ` Allison Henderson
2018-06-24 19:24 ` Darrick J. Wong [this message]
2018-06-28 23:37   ` [PATCH 08/21] xfs: defer iput on certain inodes while scrub / repair are running Dave Chinner
2018-06-29 14:49     ` Darrick J. Wong
2018-06-24 19:24 ` [PATCH 09/21] xfs: finish our set of inode get/put tracepoints for scrub Darrick J. Wong
2018-06-24 19:24 ` [PATCH 10/21] xfs: introduce online scrub freeze Darrick J. Wong
2018-06-24 19:24 ` [PATCH 11/21] xfs: repair the rmapbt Darrick J. Wong
2018-07-03  5:32   ` Dave Chinner
2018-07-03 23:59     ` Darrick J. Wong
2018-07-04  8:44       ` Carlos Maiolino
2018-07-04 18:40         ` Darrick J. Wong
2018-07-04 23:21       ` Dave Chinner
2018-07-05  3:48         ` Darrick J. Wong
2018-07-05  7:03           ` Dave Chinner
2018-07-06  0:47             ` Darrick J. Wong
2018-07-06  1:08               ` Dave Chinner
2018-06-24 19:24 ` [PATCH 12/21] xfs: repair refcount btrees Darrick J. Wong
2018-07-03  5:50   ` Dave Chinner
2018-07-04  2:23     ` Darrick J. Wong
2018-06-24 19:24 ` [PATCH 13/21] xfs: repair inode records Darrick J. Wong
2018-07-03  6:17   ` Dave Chinner
2018-07-04  0:16     ` Darrick J. Wong
2018-07-04  1:03       ` Dave Chinner
2018-07-04  1:30         ` Darrick J. Wong
2018-06-24 19:24 ` [PATCH 14/21] xfs: zap broken inode forks Darrick J. Wong
2018-07-04  2:07   ` Dave Chinner
2018-07-04  3:26     ` Darrick J. Wong
2018-06-24 19:25 ` [PATCH 15/21] xfs: repair inode block maps Darrick J. Wong
2018-07-04  3:00   ` Dave Chinner
2018-07-04  3:41     ` Darrick J. Wong
2018-06-24 19:25 ` [PATCH 16/21] xfs: repair damaged symlinks Darrick J. Wong
2018-07-04  5:45   ` Dave Chinner
2018-07-04 18:45     ` Darrick J. Wong
2018-06-24 19:25 ` [PATCH 17/21] xfs: repair extended attributes Darrick J. Wong
2018-07-06  1:03   ` Dave Chinner
2018-07-06  3:10     ` Darrick J. Wong
2018-06-24 19:25 ` [PATCH 18/21] xfs: scrub should set preen if attr leaf has holes Darrick J. Wong
2018-06-29  2:52   ` Dave Chinner
2018-06-24 19:25 ` [PATCH 19/21] xfs: repair quotas Darrick J. Wong
2018-07-06  1:50   ` Dave Chinner
2018-07-06  3:16     ` Darrick J. Wong
2018-06-24 19:25 ` [PATCH 20/21] xfs: implement live quotacheck as part of quota repair Darrick J. Wong
2018-06-24 19:25 ` [PATCH 21/21] xfs: add online scrub/repair for superblock counters Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=152986826018.3155.9241833069276452949.stgit@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.