[PATCHSET 0/3] xfs_repair: set needsrepair when dirtying filesystems

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCHSET 0/3] xfs_repair: set needsrepair when dirtying filesystems
@ 2021-02-13  5:46 Darrick J. Wong
  2021-02-13  5:46 ` [PATCH 1/3] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem Darrick J. Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-13  5:46 UTC (permalink / raw)
  To: sandeen, djwong; +Cc: linux-xfs, bfoster

Hi all,

In the base needsrepair patchset, we set up the basics of handling the
new NEEDSREPAIR flag in xfsprogs.  This second series enables repair to
protect users against the system going down after repair begins to dirty
the filesystem but before it completes.  We start by adding a write hook
to the xfs_mount so that repair can intercept the first write to set
NEEDSREPAIR on the filesystem and the ondisk super, and finish off by
inserting error injection points into libxfs and xfs_repair so that
fstests can verify that this actually works.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=repair-set-needsrepair
---
 include/xfs_mount.h |    4 ++
 libxfs/init.c       |   68 +++++++++++++++++++++++++++++++++--
 libxfs/libxfs_io.h  |   19 ++++++++++
 libxfs/rdwr.c       |   10 +++++
 repair/globals.c    |    3 ++
 repair/globals.h    |    3 ++
 repair/progress.c   |    3 ++
 repair/xfs_repair.c |   99 +++++++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 204 insertions(+), 5 deletions(-)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/3] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem
  2021-02-13  5:46 [PATCHSET 0/3] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
@ 2021-02-13  5:46 ` Darrick J. Wong
  2021-02-16 11:55   ` Brian Foster
  2021-02-13  5:46 ` [PATCH 2/3] libxfs: simulate system failure after a certain number of writes Darrick J. Wong
  2021-02-13  5:47 ` [PATCH 3/3] xfs_repair: add post-phase error injection points Darrick J. Wong
  2 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-13  5:46 UTC (permalink / raw)
  To: sandeen, djwong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <djwong@kernel.org>

Add a hook to the buffer cache so that xfs_repair can intercept the
first write to a V5 filesystem to set the NEEDSREPAIR flag.  In the
event that xfs_repair dirties the filesystem and goes down, this ensures
that the sysadmin will have to re-start repair before mounting.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 include/xfs_mount.h |    4 ++
 libxfs/rdwr.c       |    4 ++
 repair/xfs_repair.c |   95 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 103 insertions(+)


diff --git a/include/xfs_mount.h b/include/xfs_mount.h
index 75230ca5..f93a9f11 100644
--- a/include/xfs_mount.h
+++ b/include/xfs_mount.h
@@ -11,6 +11,8 @@ struct xfs_inode;
 struct xfs_buftarg;
 struct xfs_da_geometry;
 
+typedef void (*buf_writeback_fn)(struct xfs_buf *bp);
+
 /*
  * Define a user-level mount structure with all we need
  * in order to make use of the numerous XFS_* macros.
@@ -95,6 +97,8 @@ typedef struct xfs_mount {
 		int	qi_dqperchunk;
 	}			*m_quotainfo;
 
+	buf_writeback_fn	m_buf_writeback_fn;
+
 	/*
 	 * xlog is defined in libxlog and thus is not intialized by libxfs. This
 	 * allows an application to initialize and store a reference to the log
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index ac783ce3..ca272387 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -812,6 +812,10 @@ libxfs_bwrite(
 		return bp->b_error;
 	}
 
+	/* Trigger the writeback hook if there is one. */
+	if (bp->b_mount->m_buf_writeback_fn)
+		bp->b_mount->m_buf_writeback_fn(bp);
+
 	/*
 	 * clear any pre-existing error status on the buffer. This can occur if
 	 * the buffer is corrupt on disk and the repair process doesn't clear
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 90d1a95a..12e319ae 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -720,6 +720,8 @@ clear_needsrepair(
 	struct xfs_buf		*bp;
 	int			error;
 
+	mp->m_buf_writeback_fn = NULL;
+
 	/*
 	 * If we're going to clear NEEDSREPAIR, we need to make absolutely sure
 	 * that everything is ok with the ondisk filesystem.  Make sure any
@@ -751,6 +753,95 @@ clear_needsrepair(
 		libxfs_buf_relse(bp);
 }
 
+static void
+update_sb_crc_only(
+	struct xfs_buf		*bp)
+{
+	xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
+}
+
+/* Forcibly write the primary superblock with the NEEDSREPAIR flag set. */
+static void
+force_needsrepair(
+	struct xfs_mount	*mp)
+{
+	struct xfs_buf_ops	fake_ops;
+	struct xfs_buf		*bp;
+	int			error;
+
+	if (!xfs_sb_version_hascrc(&mp->m_sb) ||
+	    xfs_sb_version_needsrepair(&mp->m_sb))
+		return;
+
+	bp = libxfs_getsb(mp);
+	if (!bp || bp->b_error) {
+		do_log(
+	_("couldn't get superblock to set needsrepair, err=%d\n"),
+				bp ? bp->b_error : ENOMEM);
+		return;
+	} else {
+		/*
+		 * It's possible that we need to set NEEDSREPAIR before we've
+		 * had a chance to fix the summary counters in the primary sb.
+		 * With the exception of those counters, phase 1 already
+		 * ensured that the geometry makes sense.
+		 *
+		 * Bad summary counters in the primary super can cause the
+		 * write verifier to fail, so substitute a dummy that only sets
+		 * the CRC.  In the event of a crash, NEEDSREPAIR will prevent
+		 * the kernel from mounting our potentially damaged filesystem
+		 * until repair is run again, so it's ok to bypass the usual
+		 * verification in this one case.
+		 */
+		fake_ops = xfs_sb_buf_ops; /* struct copy */
+		fake_ops.verify_write = update_sb_crc_only;
+
+		mp->m_sb.sb_features_incompat |=
+				XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
+		libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
+
+		/* Force the primary super to disk immediately. */
+		bp->b_ops = &fake_ops;
+		error = -libxfs_bwrite(bp);
+		bp->b_ops = &xfs_sb_buf_ops;
+		if (error)
+			do_log(_("couldn't force needsrepair, err=%d\n"), error);
+	}
+	if (bp)
+		libxfs_buf_relse(bp);
+}
+
+/* Intercept the first write to the filesystem so we can set NEEDSREPAIR. */
+static void
+repair_capture_writeback(
+	struct xfs_buf		*bp)
+{
+	struct xfs_mount	*mp = bp->b_mount;
+	static pthread_mutex_t	wb_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+	/* Higher level code modifying a superblock must set NEEDSREPAIR. */
+	if (bp->b_ops == &xfs_sb_buf_ops || bp->b_bn == XFS_SB_DADDR)
+		return;
+
+	pthread_mutex_lock(&wb_mutex);
+
+	/*
+	 * If someone else already dropped the hook, then needsrepair has
+	 * already been set on the filesystem and we can unlock.
+	 */
+	if (mp->m_buf_writeback_fn != repair_capture_writeback)
+		goto unlock;
+
+	/*
+	 * If we get here, the buffer being written is not a superblock, and
+	 * needsrepair needs to be set.
+	 */
+	force_needsrepair(mp);
+	mp->m_buf_writeback_fn = NULL;
+unlock:
+	pthread_mutex_unlock(&wb_mutex);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -847,6 +938,10 @@ main(int argc, char **argv)
 	if (verbose > 2)
 		mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
 
+	/* Capture the first writeback so that we can set needsrepair. */
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		mp->m_buf_writeback_fn = repair_capture_writeback;
+
 	/*
 	 * set XFS-independent status vars from the mount/sb structure
 	 */


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/3] libxfs: simulate system failure after a certain number of writes
  2021-02-13  5:46 [PATCHSET 0/3] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
  2021-02-13  5:46 ` [PATCH 1/3] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem Darrick J. Wong
@ 2021-02-13  5:46 ` Darrick J. Wong
  2021-02-16 11:56   ` Brian Foster
  2021-02-13  5:47 ` [PATCH 3/3] xfs_repair: add post-phase error injection points Darrick J. Wong
  2 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-13  5:46 UTC (permalink / raw)
  To: sandeen, djwong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <djwong@kernel.org>

Add an error injection knob so that we can simulate system failure after
a certain number of disk writes.  This knob is being added so that we
can check repair's behavior after an arbitrary number of tests.

Set LIBXFS_DEBUG_WRITE_CRASH={ddev,logdev,rtdev}=nn in the environment
to make libxfs SIGKILL itself after nn writes to the data, log, or rt
devices.  Note that this only applies to xfs_buf writes and zero_range.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 libxfs/init.c      |   68 +++++++++++++++++++++++++++++++++++++++++++++++++---
 libxfs/libxfs_io.h |   19 +++++++++++++++
 libxfs/rdwr.c      |    6 ++++-
 3 files changed, 88 insertions(+), 5 deletions(-)


diff --git a/libxfs/init.c b/libxfs/init.c
index 8a8ce3c4..1ec83791 100644
--- a/libxfs/init.c
+++ b/libxfs/init.c
@@ -590,7 +590,8 @@ libxfs_initialize_perag(
 static struct xfs_buftarg *
 libxfs_buftarg_alloc(
 	struct xfs_mount	*mp,
-	dev_t			dev)
+	dev_t			dev,
+	unsigned long		write_fails)
 {
 	struct xfs_buftarg	*btp;
 
@@ -603,10 +604,29 @@ libxfs_buftarg_alloc(
 	btp->bt_mount = mp;
 	btp->bt_bdev = dev;
 	btp->flags = 0;
+	if (write_fails) {
+		btp->writes_left = write_fails;
+		btp->flags |= XFS_BUFTARG_INJECT_WRITE_FAIL;
+	}
+	pthread_mutex_init(&btp->lock, NULL);
 
 	return btp;
 }
 
+enum libxfs_write_failure_nums {
+	WF_DATA = 0,
+	WF_LOG,
+	WF_RT,
+	WF_MAX_OPTS,
+};
+
+static char *wf_opts[] = {
+	[WF_DATA]		= "ddev",
+	[WF_LOG]		= "logdev",
+	[WF_RT]			= "rtdev",
+	[WF_MAX_OPTS]		= NULL,
+};
+
 void
 libxfs_buftarg_init(
 	struct xfs_mount	*mp,
@@ -614,6 +634,46 @@ libxfs_buftarg_init(
 	dev_t			logdev,
 	dev_t			rtdev)
 {
+	char			*p = getenv("LIBXFS_DEBUG_WRITE_CRASH");
+	unsigned long		dfail = 0, lfail = 0, rfail = 0;
+
+	/* Simulate utility crash after a certain number of writes. */
+	while (p && *p) {
+		char *val;
+
+		switch (getsubopt(&p, wf_opts, &val)) {
+		case WF_DATA:
+			if (!val) {
+				fprintf(stderr,
+		_("ddev write fail requires a parameter\n"));
+				exit(1);
+			}
+			dfail = strtoul(val, NULL, 0);
+			break;
+		case WF_LOG:
+			if (!val) {
+				fprintf(stderr,
+		_("logdev write fail requires a parameter\n"));
+				exit(1);
+			}
+			lfail = strtoul(val, NULL, 0);
+			break;
+		case WF_RT:
+			if (!val) {
+				fprintf(stderr,
+		_("rtdev write fail requires a parameter\n"));
+				exit(1);
+			}
+			rfail = strtoul(val, NULL, 0);
+			break;
+		default:
+			fprintf(stderr, _("unknown write fail type %s\n"),
+					val);
+			exit(1);
+			break;
+		}
+	}
+
 	if (mp->m_ddev_targp) {
 		/* should already have all buftargs initialised */
 		if (mp->m_ddev_targp->bt_bdev != dev ||
@@ -647,12 +707,12 @@ libxfs_buftarg_init(
 		return;
 	}
 
-	mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev);
+	mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev, dfail);
 	if (!logdev || logdev == dev)
 		mp->m_logdev_targp = mp->m_ddev_targp;
 	else
-		mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev);
-	mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev);
+		mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev, lfail);
+	mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev, rfail);
 }
 
 /*
diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
index c80e2d59..85485257 100644
--- a/libxfs/libxfs_io.h
+++ b/libxfs/libxfs_io.h
@@ -22,6 +22,8 @@ struct xfs_perag;
  */
 struct xfs_buftarg {
 	struct xfs_mount	*bt_mount;
+	pthread_mutex_t		lock;
+	unsigned long		writes_left;
 	dev_t			bt_bdev;
 	unsigned int		flags;
 };
@@ -30,6 +32,23 @@ struct xfs_buftarg {
 #define XFS_BUFTARG_LOST_WRITE		(1 << 0)
 /* A dirty buffer failed the write verifier. */
 #define XFS_BUFTARG_CORRUPT_WRITE	(1 << 1)
+/* Simulate failure after a certain number of writes. */
+#define XFS_BUFTARG_INJECT_WRITE_FAIL	(1 << 2)
+
+/* Simulate the system crashing after a write. */
+static inline void
+xfs_buftarg_trip_write(
+	struct xfs_buftarg	*btp)
+{
+	if (!(btp->flags & XFS_BUFTARG_INJECT_WRITE_FAIL))
+		return;
+
+	pthread_mutex_lock(&btp->lock);
+	btp->writes_left--;
+	if (!btp->writes_left)
+		kill(getpid(), SIGKILL);
+	pthread_mutex_unlock(&btp->lock);
+}
 
 extern void	libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev,
 				    dev_t logdev, dev_t rtdev);
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index ca272387..fd456d6b 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -74,8 +74,10 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
 	/* try to use special zeroing methods, fall back to writes if needed */
 	len_bytes = LIBXFS_BBTOOFF64(len);
 	error = platform_zero_range(fd, start_offset, len_bytes);
-	if (!error)
+	if (!error) {
+		xfs_buftarg_trip_write(btp);
 		return 0;
+	}
 
 	zsize = min(BDSTRAT_SIZE, BBTOB(len));
 	if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
@@ -105,6 +107,7 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
 				progname, __FUNCTION__);
 			exit(1);
 		}
+		xfs_buftarg_trip_write(btp);
 		offset += bytes;
 	}
 	free(z);
@@ -860,6 +863,7 @@ libxfs_bwrite(
 	} else {
 		bp->b_flags |= LIBXFS_B_UPTODATE;
 		bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED);
+		xfs_buftarg_trip_write(bp->b_target);
 	}
 	return bp->b_error;
 }


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/3] xfs_repair: add post-phase error injection points
  2021-02-13  5:46 [PATCHSET 0/3] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
  2021-02-13  5:46 ` [PATCH 1/3] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem Darrick J. Wong
  2021-02-13  5:46 ` [PATCH 2/3] libxfs: simulate system failure after a certain number of writes Darrick J. Wong
@ 2021-02-13  5:47 ` Darrick J. Wong
  2021-02-16 11:58   ` Brian Foster
  2 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-13  5:47 UTC (permalink / raw)
  To: sandeen, djwong; +Cc: linux-xfs, bfoster

From: Darrick J. Wong <djwong@kernel.org>

Create an error injection point so that we can simulate repair failing
after a certain phase.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 repair/globals.c    |    3 +++
 repair/globals.h    |    3 +++
 repair/progress.c   |    3 +++
 repair/xfs_repair.c |    4 ++++
 4 files changed, 13 insertions(+)


diff --git a/repair/globals.c b/repair/globals.c
index 110d98b6..537d068b 100644
--- a/repair/globals.c
+++ b/repair/globals.c
@@ -117,3 +117,6 @@ uint64_t	*prog_rpt_done;
 
 int		ag_stride;
 int		thread_count;
+
+/* If nonzero, simulate failure after this phase. */
+int		fail_after_phase;
diff --git a/repair/globals.h b/repair/globals.h
index 1d397b35..a9287320 100644
--- a/repair/globals.h
+++ b/repair/globals.h
@@ -162,4 +162,7 @@ extern uint64_t		*prog_rpt_done;
 extern int		ag_stride;
 extern int		thread_count;
 
+/* If nonzero, simulate failure after this phase. */
+extern int		fail_after_phase;
+
 #endif /* _XFS_REPAIR_GLOBAL_H */
diff --git a/repair/progress.c b/repair/progress.c
index e5a9c1ef..5bbe58ec 100644
--- a/repair/progress.c
+++ b/repair/progress.c
@@ -410,6 +410,9 @@ timestamp(int end, int phase, char *buf)
 		current_phase = phase;
 	}
 
+	if (fail_after_phase && phase == fail_after_phase)
+		kill(getpid(), SIGKILL);
+
 	if (buf) {
 		tmp = localtime((const time_t *)&now);
 		sprintf(buf, _("%02d:%02d:%02d"), tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 12e319ae..6b60b8f4 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -362,6 +362,10 @@ process_args(int argc, char **argv)
 
 	if (report_corrected && no_modify)
 		usage();
+
+	p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
+	if (p)
+		fail_after_phase = (int)strtol(p, NULL, 0);
 }
 
 void __attribute__((noreturn))


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem
  2021-02-13  5:46 ` [PATCH 1/3] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem Darrick J. Wong
@ 2021-02-16 11:55   ` Brian Foster
  2021-02-18  4:45     ` Darrick J. Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Brian Foster @ 2021-02-16 11:55 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Fri, Feb 12, 2021 at 09:46:50PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Add a hook to the buffer cache so that xfs_repair can intercept the
> first write to a V5 filesystem to set the NEEDSREPAIR flag.  In the
> event that xfs_repair dirties the filesystem and goes down, this ensures
> that the sysadmin will have to re-start repair before mounting.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  include/xfs_mount.h |    4 ++
>  libxfs/rdwr.c       |    4 ++
>  repair/xfs_repair.c |   95 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 103 insertions(+)
> 
> 
...
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 90d1a95a..12e319ae 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -720,6 +720,8 @@ clear_needsrepair(
>  	struct xfs_buf		*bp;
>  	int			error;
>  
> +	mp->m_buf_writeback_fn = NULL;
> +

Slight impedence mismatch in that we set the callback unconditionally
(assuming crc=1) but we only get to this call if needsrepair is set.

>  	/*
>  	 * If we're going to clear NEEDSREPAIR, we need to make absolutely sure
>  	 * that everything is ok with the ondisk filesystem.  Make sure any
> @@ -751,6 +753,95 @@ clear_needsrepair(
>  		libxfs_buf_relse(bp);
>  }
>  
> +static void
> +update_sb_crc_only(
> +	struct xfs_buf		*bp)
> +{
> +	xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
> +}
> +
> +/* Forcibly write the primary superblock with the NEEDSREPAIR flag set. */
> +static void
> +force_needsrepair(
> +	struct xfs_mount	*mp)
> +{
> +	struct xfs_buf_ops	fake_ops;
> +	struct xfs_buf		*bp;
> +	int			error;
> +
> +	if (!xfs_sb_version_hascrc(&mp->m_sb) ||
> +	    xfs_sb_version_needsrepair(&mp->m_sb))
> +		return;
> +
> +	bp = libxfs_getsb(mp);
> +	if (!bp || bp->b_error) {
> +		do_log(
> +	_("couldn't get superblock to set needsrepair, err=%d\n"),
> +				bp ? bp->b_error : ENOMEM);
> +		return;
> +	} else {
> +		/*
> +		 * It's possible that we need to set NEEDSREPAIR before we've
> +		 * had a chance to fix the summary counters in the primary sb.
> +		 * With the exception of those counters, phase 1 already
> +		 * ensured that the geometry makes sense.
> +		 *
> +		 * Bad summary counters in the primary super can cause the
> +		 * write verifier to fail, so substitute a dummy that only sets
> +		 * the CRC.  In the event of a crash, NEEDSREPAIR will prevent
> +		 * the kernel from mounting our potentially damaged filesystem
> +		 * until repair is run again, so it's ok to bypass the usual
> +		 * verification in this one case.
> +		 */
> +		fake_ops = xfs_sb_buf_ops; /* struct copy */
> +		fake_ops.verify_write = update_sb_crc_only;
> +
> +		mp->m_sb.sb_features_incompat |=
> +				XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> +		libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
> +
> +		/* Force the primary super to disk immediately. */
> +		bp->b_ops = &fake_ops;
> +		error = -libxfs_bwrite(bp);

This looks like it calls back into the writeback hook before it's been
cleared. I'm guessing the xfs_sb_version_needsrepair() check above cuts
off any recursion issues, but it might be cleaner to clear the callback
pointer first.

> +		bp->b_ops = &xfs_sb_buf_ops;
> +		if (error)
> +			do_log(_("couldn't force needsrepair, err=%d\n"), error);
> +	}
> +	if (bp)
> +		libxfs_buf_relse(bp);

Doesn't appear we can get here with bp == NULL. Otherwise the rest looks
reasonable to me.

Brian

> +}
> +
> +/* Intercept the first write to the filesystem so we can set NEEDSREPAIR. */
> +static void
> +repair_capture_writeback(
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_mount	*mp = bp->b_mount;
> +	static pthread_mutex_t	wb_mutex = PTHREAD_MUTEX_INITIALIZER;
> +
> +	/* Higher level code modifying a superblock must set NEEDSREPAIR. */
> +	if (bp->b_ops == &xfs_sb_buf_ops || bp->b_bn == XFS_SB_DADDR)
> +		return;
> +
> +	pthread_mutex_lock(&wb_mutex);
> +
> +	/*
> +	 * If someone else already dropped the hook, then needsrepair has
> +	 * already been set on the filesystem and we can unlock.
> +	 */
> +	if (mp->m_buf_writeback_fn != repair_capture_writeback)
> +		goto unlock;
> +
> +	/*
> +	 * If we get here, the buffer being written is not a superblock, and
> +	 * needsrepair needs to be set.
> +	 */
> +	force_needsrepair(mp);
> +	mp->m_buf_writeback_fn = NULL;
> +unlock:
> +	pthread_mutex_unlock(&wb_mutex);
> +}
> +
>  int
>  main(int argc, char **argv)
>  {
> @@ -847,6 +938,10 @@ main(int argc, char **argv)
>  	if (verbose > 2)
>  		mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
>  
> +	/* Capture the first writeback so that we can set needsrepair. */
> +	if (xfs_sb_version_hascrc(&mp->m_sb))
> +		mp->m_buf_writeback_fn = repair_capture_writeback;
> +
>  	/*
>  	 * set XFS-independent status vars from the mount/sb structure
>  	 */
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] libxfs: simulate system failure after a certain number of writes
  2021-02-13  5:46 ` [PATCH 2/3] libxfs: simulate system failure after a certain number of writes Darrick J. Wong
@ 2021-02-16 11:56   ` Brian Foster
  2021-02-18  4:36     ` Darrick J. Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Brian Foster @ 2021-02-16 11:56 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Fri, Feb 12, 2021 at 09:46:56PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Add an error injection knob so that we can simulate system failure after
> a certain number of disk writes.  This knob is being added so that we
> can check repair's behavior after an arbitrary number of tests.
> 
> Set LIBXFS_DEBUG_WRITE_CRASH={ddev,logdev,rtdev}=nn in the environment
> to make libxfs SIGKILL itself after nn writes to the data, log, or rt
> devices.  Note that this only applies to xfs_buf writes and zero_range.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  libxfs/init.c      |   68 +++++++++++++++++++++++++++++++++++++++++++++++++---
>  libxfs/libxfs_io.h |   19 +++++++++++++++
>  libxfs/rdwr.c      |    6 ++++-
>  3 files changed, 88 insertions(+), 5 deletions(-)
> 
> 
> diff --git a/libxfs/init.c b/libxfs/init.c
> index 8a8ce3c4..1ec83791 100644
> --- a/libxfs/init.c
> +++ b/libxfs/init.c
...
> @@ -614,6 +634,46 @@ libxfs_buftarg_init(
>  	dev_t			logdev,
>  	dev_t			rtdev)
>  {
> +	char			*p = getenv("LIBXFS_DEBUG_WRITE_CRASH");
> +	unsigned long		dfail = 0, lfail = 0, rfail = 0;

Was there a reason for using an environment variable now rather than the
original command line option?

> +
> +	/* Simulate utility crash after a certain number of writes. */
> +	while (p && *p) {
> +		char *val;
> +
> +		switch (getsubopt(&p, wf_opts, &val)) {
> +		case WF_DATA:
> +			if (!val) {
> +				fprintf(stderr,
> +		_("ddev write fail requires a parameter\n"));
> +				exit(1);
> +			}
> +			dfail = strtoul(val, NULL, 0);
> +			break;
> +		case WF_LOG:
> +			if (!val) {
> +				fprintf(stderr,
> +		_("logdev write fail requires a parameter\n"));
> +				exit(1);
> +			}
> +			lfail = strtoul(val, NULL, 0);
> +			break;
> +		case WF_RT:
> +			if (!val) {
> +				fprintf(stderr,
> +		_("rtdev write fail requires a parameter\n"));
> +				exit(1);
> +			}
> +			rfail = strtoul(val, NULL, 0);
> +			break;
> +		default:
> +			fprintf(stderr, _("unknown write fail type %s\n"),
> +					val);
> +			exit(1);
> +			break;
> +		}
> +	}
> +
>  	if (mp->m_ddev_targp) {
>  		/* should already have all buftargs initialised */
>  		if (mp->m_ddev_targp->bt_bdev != dev ||
...
> diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
> index c80e2d59..85485257 100644
> --- a/libxfs/libxfs_io.h
> +++ b/libxfs/libxfs_io.h
...
> @@ -30,6 +32,23 @@ struct xfs_buftarg {
>  #define XFS_BUFTARG_LOST_WRITE		(1 << 0)
>  /* A dirty buffer failed the write verifier. */
>  #define XFS_BUFTARG_CORRUPT_WRITE	(1 << 1)
> +/* Simulate failure after a certain number of writes. */
> +#define XFS_BUFTARG_INJECT_WRITE_FAIL	(1 << 2)
> +
> +/* Simulate the system crashing after a write. */
> +static inline void
> +xfs_buftarg_trip_write(
> +	struct xfs_buftarg	*btp)
> +{
> +	if (!(btp->flags & XFS_BUFTARG_INJECT_WRITE_FAIL))
> +		return;
> +
> +	pthread_mutex_lock(&btp->lock);
> +	btp->writes_left--;
> +	if (!btp->writes_left)
> +		kill(getpid(), SIGKILL);

Can we just exit()?

(Same questions for the next patch..)

Brian

> +	pthread_mutex_unlock(&btp->lock);
> +}
>  
>  extern void	libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev,
>  				    dev_t logdev, dev_t rtdev);
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index ca272387..fd456d6b 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -74,8 +74,10 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
>  	/* try to use special zeroing methods, fall back to writes if needed */
>  	len_bytes = LIBXFS_BBTOOFF64(len);
>  	error = platform_zero_range(fd, start_offset, len_bytes);
> -	if (!error)
> +	if (!error) {
> +		xfs_buftarg_trip_write(btp);
>  		return 0;
> +	}
>  
>  	zsize = min(BDSTRAT_SIZE, BBTOB(len));
>  	if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
> @@ -105,6 +107,7 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
>  				progname, __FUNCTION__);
>  			exit(1);
>  		}
> +		xfs_buftarg_trip_write(btp);
>  		offset += bytes;
>  	}
>  	free(z);
> @@ -860,6 +863,7 @@ libxfs_bwrite(
>  	} else {
>  		bp->b_flags |= LIBXFS_B_UPTODATE;
>  		bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED);
> +		xfs_buftarg_trip_write(bp->b_target);
>  	}
>  	return bp->b_error;
>  }
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] xfs_repair: add post-phase error injection points
  2021-02-13  5:47 ` [PATCH 3/3] xfs_repair: add post-phase error injection points Darrick J. Wong
@ 2021-02-16 11:58   ` Brian Foster
  2021-02-18  4:47     ` Darrick J. Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Brian Foster @ 2021-02-16 11:58 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Fri, Feb 12, 2021 at 09:47:01PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Create an error injection point so that we can simulate repair failing
> after a certain phase.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  repair/globals.c    |    3 +++
>  repair/globals.h    |    3 +++
>  repair/progress.c   |    3 +++
>  repair/xfs_repair.c |    4 ++++
>  4 files changed, 13 insertions(+)
> 
> 
...
> diff --git a/repair/progress.c b/repair/progress.c
> index e5a9c1ef..5bbe58ec 100644
> --- a/repair/progress.c
> +++ b/repair/progress.c
> @@ -410,6 +410,9 @@ timestamp(int end, int phase, char *buf)
>  		current_phase = phase;
>  	}
>  
> +	if (fail_after_phase && phase == fail_after_phase)
> +		kill(getpid(), SIGKILL);
> +

It seems a little hacky to bury this in timestamp(). Perhaps we should
at least check for end == PHASE_END (even though PHASE_START is
currently only used in one place). Otherwise seems reasonable..

Brian

>  	if (buf) {
>  		tmp = localtime((const time_t *)&now);
>  		sprintf(buf, _("%02d:%02d:%02d"), tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 12e319ae..6b60b8f4 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -362,6 +362,10 @@ process_args(int argc, char **argv)
>  
>  	if (report_corrected && no_modify)
>  		usage();
> +
> +	p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
> +	if (p)
> +		fail_after_phase = (int)strtol(p, NULL, 0);
>  }
>  
>  void __attribute__((noreturn))
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] libxfs: simulate system failure after a certain number of writes
  2021-02-16 11:56   ` Brian Foster
@ 2021-02-18  4:36     ` Darrick J. Wong
  2021-02-18 13:02       ` Brian Foster
  0 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-18  4:36 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Tue, Feb 16, 2021 at 06:56:45AM -0500, Brian Foster wrote:
> On Fri, Feb 12, 2021 at 09:46:56PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Add an error injection knob so that we can simulate system failure after
> > a certain number of disk writes.  This knob is being added so that we
> > can check repair's behavior after an arbitrary number of tests.
> > 
> > Set LIBXFS_DEBUG_WRITE_CRASH={ddev,logdev,rtdev}=nn in the environment
> > to make libxfs SIGKILL itself after nn writes to the data, log, or rt
> > devices.  Note that this only applies to xfs_buf writes and zero_range.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  libxfs/init.c      |   68 +++++++++++++++++++++++++++++++++++++++++++++++++---
> >  libxfs/libxfs_io.h |   19 +++++++++++++++
> >  libxfs/rdwr.c      |    6 ++++-
> >  3 files changed, 88 insertions(+), 5 deletions(-)
> > 
> > 
> > diff --git a/libxfs/init.c b/libxfs/init.c
> > index 8a8ce3c4..1ec83791 100644
> > --- a/libxfs/init.c
> > +++ b/libxfs/init.c
> ...
> > @@ -614,6 +634,46 @@ libxfs_buftarg_init(
> >  	dev_t			logdev,
> >  	dev_t			rtdev)
> >  {
> > +	char			*p = getenv("LIBXFS_DEBUG_WRITE_CRASH");
> > +	unsigned long		dfail = 0, lfail = 0, rfail = 0;
> 
> Was there a reason for using an environment variable now rather than the
> original command line option?

Well, you said you wanted a generic write error injection hook for
libxfs, and this is the simplest way to add that, given that libraries
don't have a direct means to parse argc and argv.

I mean... this /could/ take the form of an exposed library function that
xfs utilities could opt into their own getopt loops, but that's even
/more/ infrastructure code that I'd have to write.

OTOH there's already precedent for magic environment variables to enable
libxfs debug hooks.

> > +
> > +	/* Simulate utility crash after a certain number of writes. */
> > +	while (p && *p) {
> > +		char *val;
> > +
> > +		switch (getsubopt(&p, wf_opts, &val)) {
> > +		case WF_DATA:
> > +			if (!val) {
> > +				fprintf(stderr,
> > +		_("ddev write fail requires a parameter\n"));
> > +				exit(1);
> > +			}
> > +			dfail = strtoul(val, NULL, 0);
> > +			break;
> > +		case WF_LOG:
> > +			if (!val) {
> > +				fprintf(stderr,
> > +		_("logdev write fail requires a parameter\n"));
> > +				exit(1);
> > +			}
> > +			lfail = strtoul(val, NULL, 0);
> > +			break;
> > +		case WF_RT:
> > +			if (!val) {
> > +				fprintf(stderr,
> > +		_("rtdev write fail requires a parameter\n"));
> > +				exit(1);
> > +			}
> > +			rfail = strtoul(val, NULL, 0);
> > +			break;
> > +		default:
> > +			fprintf(stderr, _("unknown write fail type %s\n"),
> > +					val);
> > +			exit(1);
> > +			break;
> > +		}
> > +	}
> > +
> >  	if (mp->m_ddev_targp) {
> >  		/* should already have all buftargs initialised */
> >  		if (mp->m_ddev_targp->bt_bdev != dev ||
> ...
> > diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
> > index c80e2d59..85485257 100644
> > --- a/libxfs/libxfs_io.h
> > +++ b/libxfs/libxfs_io.h
> ...
> > @@ -30,6 +32,23 @@ struct xfs_buftarg {
> >  #define XFS_BUFTARG_LOST_WRITE		(1 << 0)
> >  /* A dirty buffer failed the write verifier. */
> >  #define XFS_BUFTARG_CORRUPT_WRITE	(1 << 1)
> > +/* Simulate failure after a certain number of writes. */
> > +#define XFS_BUFTARG_INJECT_WRITE_FAIL	(1 << 2)
> > +
> > +/* Simulate the system crashing after a write. */
> > +static inline void
> > +xfs_buftarg_trip_write(
> > +	struct xfs_buftarg	*btp)
> > +{
> > +	if (!(btp->flags & XFS_BUFTARG_INJECT_WRITE_FAIL))
> > +		return;
> > +
> > +	pthread_mutex_lock(&btp->lock);
> > +	btp->writes_left--;
> > +	if (!btp->writes_left)
> > +		kill(getpid(), SIGKILL);
> 
> Can we just exit()?
> 
> (Same questions for the next patch..)

The goal of this generic write error injection framework is to simulate
total system crashes immediately after a write.

SIGKILL and exit are not the same, because atexit handlers don't run if
the process forcibly kills itself.

--D

> 
> Brian
> 
> > +	pthread_mutex_unlock(&btp->lock);
> > +}
> >  
> >  extern void	libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev,
> >  				    dev_t logdev, dev_t rtdev);
> > diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> > index ca272387..fd456d6b 100644
> > --- a/libxfs/rdwr.c
> > +++ b/libxfs/rdwr.c
> > @@ -74,8 +74,10 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
> >  	/* try to use special zeroing methods, fall back to writes if needed */
> >  	len_bytes = LIBXFS_BBTOOFF64(len);
> >  	error = platform_zero_range(fd, start_offset, len_bytes);
> > -	if (!error)
> > +	if (!error) {
> > +		xfs_buftarg_trip_write(btp);
> >  		return 0;
> > +	}
> >  
> >  	zsize = min(BDSTRAT_SIZE, BBTOB(len));
> >  	if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
> > @@ -105,6 +107,7 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
> >  				progname, __FUNCTION__);
> >  			exit(1);
> >  		}
> > +		xfs_buftarg_trip_write(btp);
> >  		offset += bytes;
> >  	}
> >  	free(z);
> > @@ -860,6 +863,7 @@ libxfs_bwrite(
> >  	} else {
> >  		bp->b_flags |= LIBXFS_B_UPTODATE;
> >  		bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED);
> > +		xfs_buftarg_trip_write(bp->b_target);
> >  	}
> >  	return bp->b_error;
> >  }
> > 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem
  2021-02-16 11:55   ` Brian Foster
@ 2021-02-18  4:45     ` Darrick J. Wong
  2021-02-18 12:59       ` Brian Foster
  0 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-18  4:45 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Tue, Feb 16, 2021 at 06:55:34AM -0500, Brian Foster wrote:
> On Fri, Feb 12, 2021 at 09:46:50PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Add a hook to the buffer cache so that xfs_repair can intercept the
> > first write to a V5 filesystem to set the NEEDSREPAIR flag.  In the
> > event that xfs_repair dirties the filesystem and goes down, this ensures
> > that the sysadmin will have to re-start repair before mounting.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  include/xfs_mount.h |    4 ++
> >  libxfs/rdwr.c       |    4 ++
> >  repair/xfs_repair.c |   95 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 103 insertions(+)
> > 
> > 
> ...
> > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > index 90d1a95a..12e319ae 100644
> > --- a/repair/xfs_repair.c
> > +++ b/repair/xfs_repair.c
> > @@ -720,6 +720,8 @@ clear_needsrepair(
> >  	struct xfs_buf		*bp;
> >  	int			error;
> >  
> > +	mp->m_buf_writeback_fn = NULL;
> > +
> 
> Slight impedence mismatch in that we set the callback unconditionally
> (assuming crc=1) but we only get to this call if needsrepair is set.

<nod> I'll remove that.

> >  	/*
> >  	 * If we're going to clear NEEDSREPAIR, we need to make absolutely sure
> >  	 * that everything is ok with the ondisk filesystem.  Make sure any
> > @@ -751,6 +753,95 @@ clear_needsrepair(
> >  		libxfs_buf_relse(bp);
> >  }
> >  
> > +static void
> > +update_sb_crc_only(
> > +	struct xfs_buf		*bp)
> > +{
> > +	xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
> > +}
> > +
> > +/* Forcibly write the primary superblock with the NEEDSREPAIR flag set. */
> > +static void
> > +force_needsrepair(
> > +	struct xfs_mount	*mp)
> > +{
> > +	struct xfs_buf_ops	fake_ops;
> > +	struct xfs_buf		*bp;
> > +	int			error;
> > +
> > +	if (!xfs_sb_version_hascrc(&mp->m_sb) ||
> > +	    xfs_sb_version_needsrepair(&mp->m_sb))
> > +		return;
> > +
> > +	bp = libxfs_getsb(mp);
> > +	if (!bp || bp->b_error) {
> > +		do_log(
> > +	_("couldn't get superblock to set needsrepair, err=%d\n"),
> > +				bp ? bp->b_error : ENOMEM);
> > +		return;
> > +	} else {
> > +		/*
> > +		 * It's possible that we need to set NEEDSREPAIR before we've
> > +		 * had a chance to fix the summary counters in the primary sb.
> > +		 * With the exception of those counters, phase 1 already
> > +		 * ensured that the geometry makes sense.
> > +		 *
> > +		 * Bad summary counters in the primary super can cause the
> > +		 * write verifier to fail, so substitute a dummy that only sets
> > +		 * the CRC.  In the event of a crash, NEEDSREPAIR will prevent
> > +		 * the kernel from mounting our potentially damaged filesystem
> > +		 * until repair is run again, so it's ok to bypass the usual
> > +		 * verification in this one case.
> > +		 */
> > +		fake_ops = xfs_sb_buf_ops; /* struct copy */
> > +		fake_ops.verify_write = update_sb_crc_only;
> > +
> > +		mp->m_sb.sb_features_incompat |=
> > +				XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > +		libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
> > +
> > +		/* Force the primary super to disk immediately. */
> > +		bp->b_ops = &fake_ops;
> > +		error = -libxfs_bwrite(bp);
> 
> This looks like it calls back into the writeback hook before it's been
> cleared.

Yes, it does...

> I'm guessing the xfs_sb_version_needsrepair() check above cuts
> off any recursion issues,

...but as soon as we end up in the writeback hook, the b_ops and
XFS_SB_DADDR checks cause the hook to return without doing anything.

> but it might be cleaner to clear the callback pointer first.

We need to block all other writers until /after/ we've written the
primary super, which means that we can't clear the callback until some
time after the bwrite.

> 
> > +		bp->b_ops = &xfs_sb_buf_ops;
> > +		if (error)
> > +			do_log(_("couldn't force needsrepair, err=%d\n"), error);
> > +	}
> > +	if (bp)
> > +		libxfs_buf_relse(bp);
> 
> Doesn't appear we can get here with bp == NULL. Otherwise the rest looks
> reasonable to me.

Yep, will fix.

--D

> Brian
> 
> > +}
> > +
> > +/* Intercept the first write to the filesystem so we can set NEEDSREPAIR. */
> > +static void
> > +repair_capture_writeback(
> > +	struct xfs_buf		*bp)
> > +{
> > +	struct xfs_mount	*mp = bp->b_mount;
> > +	static pthread_mutex_t	wb_mutex = PTHREAD_MUTEX_INITIALIZER;
> > +
> > +	/* Higher level code modifying a superblock must set NEEDSREPAIR. */
> > +	if (bp->b_ops == &xfs_sb_buf_ops || bp->b_bn == XFS_SB_DADDR)
> > +		return;
> > +
> > +	pthread_mutex_lock(&wb_mutex);
> > +
> > +	/*
> > +	 * If someone else already dropped the hook, then needsrepair has
> > +	 * already been set on the filesystem and we can unlock.
> > +	 */
> > +	if (mp->m_buf_writeback_fn != repair_capture_writeback)
> > +		goto unlock;
> > +
> > +	/*
> > +	 * If we get here, the buffer being written is not a superblock, and
> > +	 * needsrepair needs to be set.
> > +	 */
> > +	force_needsrepair(mp);
> > +	mp->m_buf_writeback_fn = NULL;
> > +unlock:
> > +	pthread_mutex_unlock(&wb_mutex);
> > +}
> > +
> >  int
> >  main(int argc, char **argv)
> >  {
> > @@ -847,6 +938,10 @@ main(int argc, char **argv)
> >  	if (verbose > 2)
> >  		mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
> >  
> > +	/* Capture the first writeback so that we can set needsrepair. */
> > +	if (xfs_sb_version_hascrc(&mp->m_sb))
> > +		mp->m_buf_writeback_fn = repair_capture_writeback;
> > +
> >  	/*
> >  	 * set XFS-independent status vars from the mount/sb structure
> >  	 */
> > 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] xfs_repair: add post-phase error injection points
  2021-02-16 11:58   ` Brian Foster
@ 2021-02-18  4:47     ` Darrick J. Wong
  2021-02-18 13:02       ` Brian Foster
  0 siblings, 1 reply; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-18  4:47 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Tue, Feb 16, 2021 at 06:58:39AM -0500, Brian Foster wrote:
> On Fri, Feb 12, 2021 at 09:47:01PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Create an error injection point so that we can simulate repair failing
> > after a certain phase.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  repair/globals.c    |    3 +++
> >  repair/globals.h    |    3 +++
> >  repair/progress.c   |    3 +++
> >  repair/xfs_repair.c |    4 ++++
> >  4 files changed, 13 insertions(+)
> > 
> > 
> ...
> > diff --git a/repair/progress.c b/repair/progress.c
> > index e5a9c1ef..5bbe58ec 100644
> > --- a/repair/progress.c
> > +++ b/repair/progress.c
> > @@ -410,6 +410,9 @@ timestamp(int end, int phase, char *buf)
> >  		current_phase = phase;
> >  	}
> >  
> > +	if (fail_after_phase && phase == fail_after_phase)
> > +		kill(getpid(), SIGKILL);
> > +
> 
> It seems a little hacky to bury this in timestamp(). Perhaps we should
> at least check for end == PHASE_END (even though PHASE_START is
> currently only used in one place). Otherwise seems reasonable..

Yeah, I don't know of a better place to put it -- adding another call
after every timestamp() just seems like clutter.

Will fix it to check for PHASE_END, though.

Thanks for reading. :)

--D

> Brian
> 
> >  	if (buf) {
> >  		tmp = localtime((const time_t *)&now);
> >  		sprintf(buf, _("%02d:%02d:%02d"), tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
> > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > index 12e319ae..6b60b8f4 100644
> > --- a/repair/xfs_repair.c
> > +++ b/repair/xfs_repair.c
> > @@ -362,6 +362,10 @@ process_args(int argc, char **argv)
> >  
> >  	if (report_corrected && no_modify)
> >  		usage();
> > +
> > +	p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
> > +	if (p)
> > +		fail_after_phase = (int)strtol(p, NULL, 0);
> >  }
> >  
> >  void __attribute__((noreturn))
> > 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem
  2021-02-18  4:45     ` Darrick J. Wong
@ 2021-02-18 12:59       ` Brian Foster
  2021-02-18 17:07         ` Darrick J. Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Brian Foster @ 2021-02-18 12:59 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Wed, Feb 17, 2021 at 08:45:22PM -0800, Darrick J. Wong wrote:
> On Tue, Feb 16, 2021 at 06:55:34AM -0500, Brian Foster wrote:
> > On Fri, Feb 12, 2021 at 09:46:50PM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Add a hook to the buffer cache so that xfs_repair can intercept the
> > > first write to a V5 filesystem to set the NEEDSREPAIR flag.  In the
> > > event that xfs_repair dirties the filesystem and goes down, this ensures
> > > that the sysadmin will have to re-start repair before mounting.
> > > 
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > >  include/xfs_mount.h |    4 ++
> > >  libxfs/rdwr.c       |    4 ++
> > >  repair/xfs_repair.c |   95 +++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 103 insertions(+)
> > > 
> > > 
> > ...
> > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > index 90d1a95a..12e319ae 100644
> > > --- a/repair/xfs_repair.c
> > > +++ b/repair/xfs_repair.c
...
> > > @@ -751,6 +753,95 @@ clear_needsrepair(
> > >  		libxfs_buf_relse(bp);
> > >  }
> > >  
> > > +static void
> > > +update_sb_crc_only(
> > > +	struct xfs_buf		*bp)
> > > +{
> > > +	xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
> > > +}
> > > +
> > > +/* Forcibly write the primary superblock with the NEEDSREPAIR flag set. */
> > > +static void
> > > +force_needsrepair(
> > > +	struct xfs_mount	*mp)
> > > +{
> > > +	struct xfs_buf_ops	fake_ops;
> > > +	struct xfs_buf		*bp;
> > > +	int			error;
> > > +
> > > +	if (!xfs_sb_version_hascrc(&mp->m_sb) ||
> > > +	    xfs_sb_version_needsrepair(&mp->m_sb))
> > > +		return;
> > > +
> > > +	bp = libxfs_getsb(mp);
> > > +	if (!bp || bp->b_error) {
> > > +		do_log(
> > > +	_("couldn't get superblock to set needsrepair, err=%d\n"),
> > > +				bp ? bp->b_error : ENOMEM);
> > > +		return;
> > > +	} else {
> > > +		/*
> > > +		 * It's possible that we need to set NEEDSREPAIR before we've
> > > +		 * had a chance to fix the summary counters in the primary sb.
> > > +		 * With the exception of those counters, phase 1 already
> > > +		 * ensured that the geometry makes sense.
> > > +		 *
> > > +		 * Bad summary counters in the primary super can cause the
> > > +		 * write verifier to fail, so substitute a dummy that only sets
> > > +		 * the CRC.  In the event of a crash, NEEDSREPAIR will prevent
> > > +		 * the kernel from mounting our potentially damaged filesystem
> > > +		 * until repair is run again, so it's ok to bypass the usual
> > > +		 * verification in this one case.
> > > +		 */
> > > +		fake_ops = xfs_sb_buf_ops; /* struct copy */
> > > +		fake_ops.verify_write = update_sb_crc_only;
> > > +
> > > +		mp->m_sb.sb_features_incompat |=
> > > +				XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > > +		libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
> > > +
> > > +		/* Force the primary super to disk immediately. */
> > > +		bp->b_ops = &fake_ops;
> > > +		error = -libxfs_bwrite(bp);
> > 
> > This looks like it calls back into the writeback hook before it's been
> > cleared.
> 
> Yes, it does...
> 
> > I'm guessing the xfs_sb_version_needsrepair() check above cuts
> > off any recursion issues,
> 
> ...but as soon as we end up in the writeback hook, the b_ops and
> XFS_SB_DADDR checks cause the hook to return without doing anything.
> 
> > but it might be cleaner to clear the callback pointer first.
> 
> We need to block all other writers until /after/ we've written the
> primary super, which means that we can't clear the callback until some
> time after the bwrite.
> 

Ok, care to document how this is intended to work with a comment?

Brian

> > 
> > > +		bp->b_ops = &xfs_sb_buf_ops;
> > > +		if (error)
> > > +			do_log(_("couldn't force needsrepair, err=%d\n"), error);
> > > +	}
> > > +	if (bp)
> > > +		libxfs_buf_relse(bp);
> > 
> > Doesn't appear we can get here with bp == NULL. Otherwise the rest looks
> > reasonable to me.
> 
> Yep, will fix.
> 
> --D
> 
> > Brian
> > 
> > > +}
> > > +
> > > +/* Intercept the first write to the filesystem so we can set NEEDSREPAIR. */
> > > +static void
> > > +repair_capture_writeback(
> > > +	struct xfs_buf		*bp)
> > > +{
> > > +	struct xfs_mount	*mp = bp->b_mount;
> > > +	static pthread_mutex_t	wb_mutex = PTHREAD_MUTEX_INITIALIZER;
> > > +
> > > +	/* Higher level code modifying a superblock must set NEEDSREPAIR. */
> > > +	if (bp->b_ops == &xfs_sb_buf_ops || bp->b_bn == XFS_SB_DADDR)
> > > +		return;
> > > +
> > > +	pthread_mutex_lock(&wb_mutex);
> > > +
> > > +	/*
> > > +	 * If someone else already dropped the hook, then needsrepair has
> > > +	 * already been set on the filesystem and we can unlock.
> > > +	 */
> > > +	if (mp->m_buf_writeback_fn != repair_capture_writeback)
> > > +		goto unlock;
> > > +
> > > +	/*
> > > +	 * If we get here, the buffer being written is not a superblock, and
> > > +	 * needsrepair needs to be set.
> > > +	 */
> > > +	force_needsrepair(mp);
> > > +	mp->m_buf_writeback_fn = NULL;
> > > +unlock:
> > > +	pthread_mutex_unlock(&wb_mutex);
> > > +}
> > > +
> > >  int
> > >  main(int argc, char **argv)
> > >  {
> > > @@ -847,6 +938,10 @@ main(int argc, char **argv)
> > >  	if (verbose > 2)
> > >  		mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
> > >  
> > > +	/* Capture the first writeback so that we can set needsrepair. */
> > > +	if (xfs_sb_version_hascrc(&mp->m_sb))
> > > +		mp->m_buf_writeback_fn = repair_capture_writeback;
> > > +
> > >  	/*
> > >  	 * set XFS-independent status vars from the mount/sb structure
> > >  	 */
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] libxfs: simulate system failure after a certain number of writes
  2021-02-18  4:36     ` Darrick J. Wong
@ 2021-02-18 13:02       ` Brian Foster
  2021-02-18 17:42         ` Darrick J. Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Brian Foster @ 2021-02-18 13:02 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Wed, Feb 17, 2021 at 08:36:20PM -0800, Darrick J. Wong wrote:
> On Tue, Feb 16, 2021 at 06:56:45AM -0500, Brian Foster wrote:
> > On Fri, Feb 12, 2021 at 09:46:56PM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Add an error injection knob so that we can simulate system failure after
> > > a certain number of disk writes.  This knob is being added so that we
> > > can check repair's behavior after an arbitrary number of tests.
> > > 
> > > Set LIBXFS_DEBUG_WRITE_CRASH={ddev,logdev,rtdev}=nn in the environment
> > > to make libxfs SIGKILL itself after nn writes to the data, log, or rt
> > > devices.  Note that this only applies to xfs_buf writes and zero_range.
> > > 
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > >  libxfs/init.c      |   68 +++++++++++++++++++++++++++++++++++++++++++++++++---
> > >  libxfs/libxfs_io.h |   19 +++++++++++++++
> > >  libxfs/rdwr.c      |    6 ++++-
> > >  3 files changed, 88 insertions(+), 5 deletions(-)
> > > 
> > > 
> > > diff --git a/libxfs/init.c b/libxfs/init.c
> > > index 8a8ce3c4..1ec83791 100644
> > > --- a/libxfs/init.c
> > > +++ b/libxfs/init.c
> > ...
> > > @@ -614,6 +634,46 @@ libxfs_buftarg_init(
> > >  	dev_t			logdev,
> > >  	dev_t			rtdev)
> > >  {
> > > +	char			*p = getenv("LIBXFS_DEBUG_WRITE_CRASH");
> > > +	unsigned long		dfail = 0, lfail = 0, rfail = 0;
> > 
> > Was there a reason for using an environment variable now rather than the
> > original command line option?
> 
> Well, you said you wanted a generic write error injection hook for
> libxfs, and this is the simplest way to add that, given that libraries
> don't have a direct means to parse argc and argv.
> 

I think you're misinterpreting my previous feedback. ;) I thought the
injection mechanism was too closely tied to an implementation detail
(i.e. "fail after updating needsrepair bit") of the application.
Instead, I preferred a more generic mechanism (the "fail after so many
I/Os," "fail after phase N" approaches in these patches) that covers the
original use case. That broadens the potential test coverage and
usefulness of the mechanism.

> I mean... this /could/ take the form of an exposed library function that
> xfs utilities could opt into their own getopt loops, but that's even
> /more/ infrastructure code that I'd have to write.
> 

In this case I was just curious why the interface was changed from the
previous approach. ISTM it didn't necessarily have to, but I'm not
concerned about it either way.

...
> > > diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
> > > index c80e2d59..85485257 100644
> > > --- a/libxfs/libxfs_io.h
> > > +++ b/libxfs/libxfs_io.h
> > ...
> > > @@ -30,6 +32,23 @@ struct xfs_buftarg {
> > >  #define XFS_BUFTARG_LOST_WRITE		(1 << 0)
> > >  /* A dirty buffer failed the write verifier. */
> > >  #define XFS_BUFTARG_CORRUPT_WRITE	(1 << 1)
> > > +/* Simulate failure after a certain number of writes. */
> > > +#define XFS_BUFTARG_INJECT_WRITE_FAIL	(1 << 2)
> > > +
> > > +/* Simulate the system crashing after a write. */
> > > +static inline void
> > > +xfs_buftarg_trip_write(
> > > +	struct xfs_buftarg	*btp)
> > > +{
> > > +	if (!(btp->flags & XFS_BUFTARG_INJECT_WRITE_FAIL))
> > > +		return;
> > > +
> > > +	pthread_mutex_lock(&btp->lock);
> > > +	btp->writes_left--;
> > > +	if (!btp->writes_left)
> > > +		kill(getpid(), SIGKILL);
> > 
> > Can we just exit()?
> > 
> > (Same questions for the next patch..)
> 
> The goal of this generic write error injection framework is to simulate
> total system crashes immediately after a write.
> 
> SIGKILL and exit are not the same, because atexit handlers don't run if
> the process forcibly kills itself.
> 

Can you document this somewhere please?

Brian

> --D
> 
> > 
> > Brian
> > 
> > > +	pthread_mutex_unlock(&btp->lock);
> > > +}
> > >  
> > >  extern void	libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev,
> > >  				    dev_t logdev, dev_t rtdev);
> > > diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> > > index ca272387..fd456d6b 100644
> > > --- a/libxfs/rdwr.c
> > > +++ b/libxfs/rdwr.c
> > > @@ -74,8 +74,10 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
> > >  	/* try to use special zeroing methods, fall back to writes if needed */
> > >  	len_bytes = LIBXFS_BBTOOFF64(len);
> > >  	error = platform_zero_range(fd, start_offset, len_bytes);
> > > -	if (!error)
> > > +	if (!error) {
> > > +		xfs_buftarg_trip_write(btp);
> > >  		return 0;
> > > +	}
> > >  
> > >  	zsize = min(BDSTRAT_SIZE, BBTOB(len));
> > >  	if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
> > > @@ -105,6 +107,7 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
> > >  				progname, __FUNCTION__);
> > >  			exit(1);
> > >  		}
> > > +		xfs_buftarg_trip_write(btp);
> > >  		offset += bytes;
> > >  	}
> > >  	free(z);
> > > @@ -860,6 +863,7 @@ libxfs_bwrite(
> > >  	} else {
> > >  		bp->b_flags |= LIBXFS_B_UPTODATE;
> > >  		bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED);
> > > +		xfs_buftarg_trip_write(bp->b_target);
> > >  	}
> > >  	return bp->b_error;
> > >  }
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] xfs_repair: add post-phase error injection points
  2021-02-18  4:47     ` Darrick J. Wong
@ 2021-02-18 13:02       ` Brian Foster
  2021-02-18 18:01         ` Darrick J. Wong
  0 siblings, 1 reply; 16+ messages in thread
From: Brian Foster @ 2021-02-18 13:02 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Wed, Feb 17, 2021 at 08:47:29PM -0800, Darrick J. Wong wrote:
> On Tue, Feb 16, 2021 at 06:58:39AM -0500, Brian Foster wrote:
> > On Fri, Feb 12, 2021 at 09:47:01PM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Create an error injection point so that we can simulate repair failing
> > > after a certain phase.
> > > 
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > >  repair/globals.c    |    3 +++
> > >  repair/globals.h    |    3 +++
> > >  repair/progress.c   |    3 +++
> > >  repair/xfs_repair.c |    4 ++++
> > >  4 files changed, 13 insertions(+)
> > > 
> > > 
> > ...
> > > diff --git a/repair/progress.c b/repair/progress.c
> > > index e5a9c1ef..5bbe58ec 100644
> > > --- a/repair/progress.c
> > > +++ b/repair/progress.c
> > > @@ -410,6 +410,9 @@ timestamp(int end, int phase, char *buf)
> > >  		current_phase = phase;
> > >  	}
> > >  
> > > +	if (fail_after_phase && phase == fail_after_phase)
> > > +		kill(getpid(), SIGKILL);
> > > +
> > 
> > It seems a little hacky to bury this in timestamp(). Perhaps we should
> > at least check for end == PHASE_END (even though PHASE_START is
> > currently only used in one place). Otherwise seems reasonable..
> 
> Yeah, I don't know of a better place to put it -- adding another call
> after every timestamp() just seems like clutter.
> 

I was thinking that factoring timestamp() into a new post-phase helper
seemed a relatively simple cleanup.

Brian

> Will fix it to check for PHASE_END, though.
> 
> Thanks for reading. :)
> 
> --D
> 
> > Brian
> > 
> > >  	if (buf) {
> > >  		tmp = localtime((const time_t *)&now);
> > >  		sprintf(buf, _("%02d:%02d:%02d"), tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
> > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > index 12e319ae..6b60b8f4 100644
> > > --- a/repair/xfs_repair.c
> > > +++ b/repair/xfs_repair.c
> > > @@ -362,6 +362,10 @@ process_args(int argc, char **argv)
> > >  
> > >  	if (report_corrected && no_modify)
> > >  		usage();
> > > +
> > > +	p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
> > > +	if (p)
> > > +		fail_after_phase = (int)strtol(p, NULL, 0);
> > >  }
> > >  
> > >  void __attribute__((noreturn))
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem
  2021-02-18 12:59       ` Brian Foster
@ 2021-02-18 17:07         ` Darrick J. Wong
  0 siblings, 0 replies; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-18 17:07 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Thu, Feb 18, 2021 at 07:59:27AM -0500, Brian Foster wrote:
> On Wed, Feb 17, 2021 at 08:45:22PM -0800, Darrick J. Wong wrote:
> > On Tue, Feb 16, 2021 at 06:55:34AM -0500, Brian Foster wrote:
> > > On Fri, Feb 12, 2021 at 09:46:50PM -0800, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <djwong@kernel.org>
> > > > 
> > > > Add a hook to the buffer cache so that xfs_repair can intercept the
> > > > first write to a V5 filesystem to set the NEEDSREPAIR flag.  In the
> > > > event that xfs_repair dirties the filesystem and goes down, this ensures
> > > > that the sysadmin will have to re-start repair before mounting.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > >  include/xfs_mount.h |    4 ++
> > > >  libxfs/rdwr.c       |    4 ++
> > > >  repair/xfs_repair.c |   95 +++++++++++++++++++++++++++++++++++++++++++++++++++
> > > >  3 files changed, 103 insertions(+)
> > > > 
> > > > 
> > > ...
> > > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > > index 90d1a95a..12e319ae 100644
> > > > --- a/repair/xfs_repair.c
> > > > +++ b/repair/xfs_repair.c
> ...
> > > > @@ -751,6 +753,95 @@ clear_needsrepair(
> > > >  		libxfs_buf_relse(bp);
> > > >  }
> > > >  
> > > > +static void
> > > > +update_sb_crc_only(
> > > > +	struct xfs_buf		*bp)
> > > > +{
> > > > +	xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
> > > > +}
> > > > +
> > > > +/* Forcibly write the primary superblock with the NEEDSREPAIR flag set. */
> > > > +static void
> > > > +force_needsrepair(
> > > > +	struct xfs_mount	*mp)
> > > > +{
> > > > +	struct xfs_buf_ops	fake_ops;
> > > > +	struct xfs_buf		*bp;
> > > > +	int			error;
> > > > +
> > > > +	if (!xfs_sb_version_hascrc(&mp->m_sb) ||
> > > > +	    xfs_sb_version_needsrepair(&mp->m_sb))
> > > > +		return;
> > > > +
> > > > +	bp = libxfs_getsb(mp);
> > > > +	if (!bp || bp->b_error) {
> > > > +		do_log(
> > > > +	_("couldn't get superblock to set needsrepair, err=%d\n"),
> > > > +				bp ? bp->b_error : ENOMEM);
> > > > +		return;
> > > > +	} else {
> > > > +		/*
> > > > +		 * It's possible that we need to set NEEDSREPAIR before we've
> > > > +		 * had a chance to fix the summary counters in the primary sb.
> > > > +		 * With the exception of those counters, phase 1 already
> > > > +		 * ensured that the geometry makes sense.
> > > > +		 *
> > > > +		 * Bad summary counters in the primary super can cause the
> > > > +		 * write verifier to fail, so substitute a dummy that only sets
> > > > +		 * the CRC.  In the event of a crash, NEEDSREPAIR will prevent
> > > > +		 * the kernel from mounting our potentially damaged filesystem
> > > > +		 * until repair is run again, so it's ok to bypass the usual
> > > > +		 * verification in this one case.
> > > > +		 */
> > > > +		fake_ops = xfs_sb_buf_ops; /* struct copy */
> > > > +		fake_ops.verify_write = update_sb_crc_only;
> > > > +
> > > > +		mp->m_sb.sb_features_incompat |=
> > > > +				XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> > > > +		libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
> > > > +
> > > > +		/* Force the primary super to disk immediately. */
> > > > +		bp->b_ops = &fake_ops;
> > > > +		error = -libxfs_bwrite(bp);
> > > 
> > > This looks like it calls back into the writeback hook before it's been
> > > cleared.
> > 
> > Yes, it does...
> > 
> > > I'm guessing the xfs_sb_version_needsrepair() check above cuts
> > > off any recursion issues,
> > 
> > ...but as soon as we end up in the writeback hook, the b_ops and
> > XFS_SB_DADDR checks cause the hook to return without doing anything.
> > 
> > > but it might be cleaner to clear the callback pointer first.
> > 
> > We need to block all other writers until /after/ we've written the
> > primary super, which means that we can't clear the callback until some
> > time after the bwrite.
> > 
> 
> Ok, care to document how this is intended to work with a comment?

Will do, though I'll put the comments about the hook in the actual hook
function:

	/*
	 * This write hook ignores any buffer that looks like a
	 * superblock to avoid hook recursion when setting NEEDSREPAIR.
	 * Higher level code modifying an sb must control the flag
	 * manually.
	 */

	if (bp->b_ops == &xfs_sb_buf_ops || bp->b_bn == XFS_SB_DADDR)
		return;

	pthread_mutex_lock(&wb_mutex);

	/*
	 * If someone else already dropped the hook, then needsrepair
	 * has already been set on the filesystem and we can unlock.
	 */
	if (mp->m_buf_writeback_fn != repair_capture_writeback)
		goto unlock;

	/*
	 * If we get here, the buffer being written is not a superblock,
	 * and needsrepair needs to be set.  The hook is kept in place
	 * to plug all other writes until the sb write finishes.
	 */
	force_needsrepair(mp);
	mp->m_buf_writeback_fn = NULL;

--D

> Brian
> 
> > > 
> > > > +		bp->b_ops = &xfs_sb_buf_ops;
> > > > +		if (error)
> > > > +			do_log(_("couldn't force needsrepair, err=%d\n"), error);
> > > > +	}
> > > > +	if (bp)
> > > > +		libxfs_buf_relse(bp);
> > > 
> > > Doesn't appear we can get here with bp == NULL. Otherwise the rest looks
> > > reasonable to me.
> > 
> > Yep, will fix.
> > 
> > --D
> > 
> > > Brian
> > > 
> > > > +}
> > > > +
> > > > +/* Intercept the first write to the filesystem so we can set NEEDSREPAIR. */
> > > > +static void
> > > > +repair_capture_writeback(
> > > > +	struct xfs_buf		*bp)
> > > > +{
> > > > +	struct xfs_mount	*mp = bp->b_mount;
> > > > +	static pthread_mutex_t	wb_mutex = PTHREAD_MUTEX_INITIALIZER;
> > > > +
> > > > +	/* Higher level code modifying a superblock must set NEEDSREPAIR. */
> > > > +	if (bp->b_ops == &xfs_sb_buf_ops || bp->b_bn == XFS_SB_DADDR)
> > > > +		return;
> > > > +
> > > > +	pthread_mutex_lock(&wb_mutex);
> > > > +
> > > > +	/*
> > > > +	 * If someone else already dropped the hook, then needsrepair has
> > > > +	 * already been set on the filesystem and we can unlock.
> > > > +	 */
> > > > +	if (mp->m_buf_writeback_fn != repair_capture_writeback)
> > > > +		goto unlock;
> > > > +
> > > > +	/*
> > > > +	 * If we get here, the buffer being written is not a superblock, and
> > > > +	 * needsrepair needs to be set.
> > > > +	 */
> > > > +	force_needsrepair(mp);
> > > > +	mp->m_buf_writeback_fn = NULL;
> > > > +unlock:
> > > > +	pthread_mutex_unlock(&wb_mutex);
> > > > +}
> > > > +
> > > >  int
> > > >  main(int argc, char **argv)
> > > >  {
> > > > @@ -847,6 +938,10 @@ main(int argc, char **argv)
> > > >  	if (verbose > 2)
> > > >  		mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
> > > >  
> > > > +	/* Capture the first writeback so that we can set needsrepair. */
> > > > +	if (xfs_sb_version_hascrc(&mp->m_sb))
> > > > +		mp->m_buf_writeback_fn = repair_capture_writeback;
> > > > +
> > > >  	/*
> > > >  	 * set XFS-independent status vars from the mount/sb structure
> > > >  	 */
> > > > 
> > > 
> > 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] libxfs: simulate system failure after a certain number of writes
  2021-02-18 13:02       ` Brian Foster
@ 2021-02-18 17:42         ` Darrick J. Wong
  0 siblings, 0 replies; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-18 17:42 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Thu, Feb 18, 2021 at 08:02:17AM -0500, Brian Foster wrote:
> On Wed, Feb 17, 2021 at 08:36:20PM -0800, Darrick J. Wong wrote:
> > On Tue, Feb 16, 2021 at 06:56:45AM -0500, Brian Foster wrote:
> > > On Fri, Feb 12, 2021 at 09:46:56PM -0800, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <djwong@kernel.org>
> > > > 
> > > > Add an error injection knob so that we can simulate system failure after
> > > > a certain number of disk writes.  This knob is being added so that we
> > > > can check repair's behavior after an arbitrary number of tests.
> > > > 
> > > > Set LIBXFS_DEBUG_WRITE_CRASH={ddev,logdev,rtdev}=nn in the environment
> > > > to make libxfs SIGKILL itself after nn writes to the data, log, or rt
> > > > devices.  Note that this only applies to xfs_buf writes and zero_range.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > >  libxfs/init.c      |   68 +++++++++++++++++++++++++++++++++++++++++++++++++---
> > > >  libxfs/libxfs_io.h |   19 +++++++++++++++
> > > >  libxfs/rdwr.c      |    6 ++++-
> > > >  3 files changed, 88 insertions(+), 5 deletions(-)
> > > > 
> > > > 
> > > > diff --git a/libxfs/init.c b/libxfs/init.c
> > > > index 8a8ce3c4..1ec83791 100644
> > > > --- a/libxfs/init.c
> > > > +++ b/libxfs/init.c
> > > ...
> > > > @@ -614,6 +634,46 @@ libxfs_buftarg_init(
> > > >  	dev_t			logdev,
> > > >  	dev_t			rtdev)
> > > >  {
> > > > +	char			*p = getenv("LIBXFS_DEBUG_WRITE_CRASH");
> > > > +	unsigned long		dfail = 0, lfail = 0, rfail = 0;
> > > 
> > > Was there a reason for using an environment variable now rather than the
> > > original command line option?
> > 
> > Well, you said you wanted a generic write error injection hook for
> > libxfs, and this is the simplest way to add that, given that libraries
> > don't have a direct means to parse argc and argv.
> > 
> 
> I think you're misinterpreting my previous feedback. ;) I thought the
> injection mechanism was too closely tied to an implementation detail
> (i.e. "fail after updating needsrepair bit") of the application.
> Instead, I preferred a more generic mechanism (the "fail after so many
> I/Os," "fail after phase N" approaches in these patches) that covers the
> original use case. That broadens the potential test coverage and
> usefulness of the mechanism.

Agreed.  Admittedly, the test case I wrote for it that kills repair
after NR writes (where NR steadily increases) has opened my eyes to how
... stunningly awful xfs_repair (and e2fsck) can be.

(As in, you can *very easily* snatch death from the jaws of victory if
all you wanted was to fix a minor bitflip somewhere /and/ repair
dies...)

> > I mean... this /could/ take the form of an exposed library function that
> > xfs utilities could opt into their own getopt loops, but that's even
> > /more/ infrastructure code that I'd have to write.
> > 
> 
> In this case I was just curious why the interface was changed from the
> previous approach. ISTM it didn't necessarily have to, but I'm not
> concerned about it either way.

<nod>

> ...
> > > > diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
> > > > index c80e2d59..85485257 100644
> > > > --- a/libxfs/libxfs_io.h
> > > > +++ b/libxfs/libxfs_io.h
> > > ...
> > > > @@ -30,6 +32,23 @@ struct xfs_buftarg {
> > > >  #define XFS_BUFTARG_LOST_WRITE		(1 << 0)
> > > >  /* A dirty buffer failed the write verifier. */
> > > >  #define XFS_BUFTARG_CORRUPT_WRITE	(1 << 1)
> > > > +/* Simulate failure after a certain number of writes. */
> > > > +#define XFS_BUFTARG_INJECT_WRITE_FAIL	(1 << 2)
> > > > +
> > > > +/* Simulate the system crashing after a write. */
> > > > +static inline void
> > > > +xfs_buftarg_trip_write(
> > > > +	struct xfs_buftarg	*btp)
> > > > +{
> > > > +	if (!(btp->flags & XFS_BUFTARG_INJECT_WRITE_FAIL))
> > > > +		return;
> > > > +
> > > > +	pthread_mutex_lock(&btp->lock);
> > > > +	btp->writes_left--;
> > > > +	if (!btp->writes_left)
> > > > +		kill(getpid(), SIGKILL);
> > > 
> > > Can we just exit()?
> > > 
> > > (Same questions for the next patch..)
> > 
> > The goal of this generic write error injection framework is to simulate
> > total system crashes immediately after a write.
> > 
> > SIGKILL and exit are not the same, because atexit handlers don't run if
> > the process forcibly kills itself.
> > 
> 
> Can you document this somewhere please?

Will do.

--D

> Brian
> 
> > --D
> > 
> > > 
> > > Brian
> > > 
> > > > +	pthread_mutex_unlock(&btp->lock);
> > > > +}
> > > >  
> > > >  extern void	libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev,
> > > >  				    dev_t logdev, dev_t rtdev);
> > > > diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> > > > index ca272387..fd456d6b 100644
> > > > --- a/libxfs/rdwr.c
> > > > +++ b/libxfs/rdwr.c
> > > > @@ -74,8 +74,10 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
> > > >  	/* try to use special zeroing methods, fall back to writes if needed */
> > > >  	len_bytes = LIBXFS_BBTOOFF64(len);
> > > >  	error = platform_zero_range(fd, start_offset, len_bytes);
> > > > -	if (!error)
> > > > +	if (!error) {
> > > > +		xfs_buftarg_trip_write(btp);
> > > >  		return 0;
> > > > +	}
> > > >  
> > > >  	zsize = min(BDSTRAT_SIZE, BBTOB(len));
> > > >  	if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
> > > > @@ -105,6 +107,7 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
> > > >  				progname, __FUNCTION__);
> > > >  			exit(1);
> > > >  		}
> > > > +		xfs_buftarg_trip_write(btp);
> > > >  		offset += bytes;
> > > >  	}
> > > >  	free(z);
> > > > @@ -860,6 +863,7 @@ libxfs_bwrite(
> > > >  	} else {
> > > >  		bp->b_flags |= LIBXFS_B_UPTODATE;
> > > >  		bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED);
> > > > +		xfs_buftarg_trip_write(bp->b_target);
> > > >  	}
> > > >  	return bp->b_error;
> > > >  }
> > > > 
> > > 
> > 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] xfs_repair: add post-phase error injection points
  2021-02-18 13:02       ` Brian Foster
@ 2021-02-18 18:01         ` Darrick J. Wong
  0 siblings, 0 replies; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-18 18:01 UTC (permalink / raw)
  To: Brian Foster; +Cc: sandeen, linux-xfs

On Thu, Feb 18, 2021 at 08:02:28AM -0500, Brian Foster wrote:
> On Wed, Feb 17, 2021 at 08:47:29PM -0800, Darrick J. Wong wrote:
> > On Tue, Feb 16, 2021 at 06:58:39AM -0500, Brian Foster wrote:
> > > On Fri, Feb 12, 2021 at 09:47:01PM -0800, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <djwong@kernel.org>
> > > > 
> > > > Create an error injection point so that we can simulate repair failing
> > > > after a certain phase.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > >  repair/globals.c    |    3 +++
> > > >  repair/globals.h    |    3 +++
> > > >  repair/progress.c   |    3 +++
> > > >  repair/xfs_repair.c |    4 ++++
> > > >  4 files changed, 13 insertions(+)
> > > > 
> > > > 
> > > ...
> > > > diff --git a/repair/progress.c b/repair/progress.c
> > > > index e5a9c1ef..5bbe58ec 100644
> > > > --- a/repair/progress.c
> > > > +++ b/repair/progress.c
> > > > @@ -410,6 +410,9 @@ timestamp(int end, int phase, char *buf)
> > > >  		current_phase = phase;
> > > >  	}
> > > >  
> > > > +	if (fail_after_phase && phase == fail_after_phase)
> > > > +		kill(getpid(), SIGKILL);
> > > > +
> > > 
> > > It seems a little hacky to bury this in timestamp(). Perhaps we should
> > > at least check for end == PHASE_END (even though PHASE_START is
> > > currently only used in one place). Otherwise seems reasonable..
> > 
> > Yeah, I don't know of a better place to put it -- adding another call
> > after every timestamp() just seems like clutter.
> > 
> 
> I was thinking that factoring timestamp() into a new post-phase helper
> seemed a relatively simple cleanup.

Ok, will do.

--D

> Brian
> 
> > Will fix it to check for PHASE_END, though.
> > 
> > Thanks for reading. :)
> > 
> > --D
> > 
> > > Brian
> > > 
> > > >  	if (buf) {
> > > >  		tmp = localtime((const time_t *)&now);
> > > >  		sprintf(buf, _("%02d:%02d:%02d"), tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
> > > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > > > index 12e319ae..6b60b8f4 100644
> > > > --- a/repair/xfs_repair.c
> > > > +++ b/repair/xfs_repair.c
> > > > @@ -362,6 +362,10 @@ process_args(int argc, char **argv)
> > > >  
> > > >  	if (report_corrected && no_modify)
> > > >  		usage();
> > > > +
> > > > +	p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
> > > > +	if (p)
> > > > +		fail_after_phase = (int)strtol(p, NULL, 0);
> > > >  }
> > > >  
> > > >  void __attribute__((noreturn))
> > > > 
> > > 
> > 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-02-18 18:48 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-13  5:46 [PATCHSET 0/3] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
2021-02-13  5:46 ` [PATCH 1/3] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem Darrick J. Wong
2021-02-16 11:55   ` Brian Foster
2021-02-18  4:45     ` Darrick J. Wong
2021-02-18 12:59       ` Brian Foster
2021-02-18 17:07         ` Darrick J. Wong
2021-02-13  5:46 ` [PATCH 2/3] libxfs: simulate system failure after a certain number of writes Darrick J. Wong
2021-02-16 11:56   ` Brian Foster
2021-02-18  4:36     ` Darrick J. Wong
2021-02-18 13:02       ` Brian Foster
2021-02-18 17:42         ` Darrick J. Wong
2021-02-13  5:47 ` [PATCH 3/3] xfs_repair: add post-phase error injection points Darrick J. Wong
2021-02-16 11:58   ` Brian Foster
2021-02-18  4:47     ` Darrick J. Wong
2021-02-18 13:02       ` Brian Foster
2021-02-18 18:01         ` Darrick J. Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.