* [PATCH 1/4] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem
2021-02-23 3:01 [PATCHSET v3 0/4] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
@ 2021-02-23 3:01 ` Darrick J. Wong
2021-02-24 5:39 ` Allison Henderson
2021-02-25 8:16 ` Christoph Hellwig
2021-02-23 3:01 ` [PATCH 2/4] libxfs: simulate system failure after a certain number of writes Darrick J. Wong
` (2 subsequent siblings)
3 siblings, 2 replies; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-23 3:01 UTC (permalink / raw)
To: sandeen, djwong; +Cc: Brian Foster, linux-xfs, bfoster
From: Darrick J. Wong <djwong@kernel.org>
Add a hook to the buffer cache so that xfs_repair can intercept the
first write to a V5 filesystem to set the NEEDSREPAIR flag. In the
event that xfs_repair dirties the filesystem and goes down, this ensures
that the sysadmin will have to re-start repair before mounting.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
include/xfs_mount.h | 4 ++
libxfs/rdwr.c | 4 ++
repair/xfs_repair.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 110 insertions(+)
diff --git a/include/xfs_mount.h b/include/xfs_mount.h
index 75230ca5..f93a9f11 100644
--- a/include/xfs_mount.h
+++ b/include/xfs_mount.h
@@ -11,6 +11,8 @@ struct xfs_inode;
struct xfs_buftarg;
struct xfs_da_geometry;
+typedef void (*buf_writeback_fn)(struct xfs_buf *bp);
+
/*
* Define a user-level mount structure with all we need
* in order to make use of the numerous XFS_* macros.
@@ -95,6 +97,8 @@ typedef struct xfs_mount {
int qi_dqperchunk;
} *m_quotainfo;
+ buf_writeback_fn m_buf_writeback_fn;
+
/*
* xlog is defined in libxlog and thus is not intialized by libxfs. This
* allows an application to initialize and store a reference to the log
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index ac783ce3..ca272387 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -812,6 +812,10 @@ libxfs_bwrite(
return bp->b_error;
}
+ /* Trigger the writeback hook if there is one. */
+ if (bp->b_mount->m_buf_writeback_fn)
+ bp->b_mount->m_buf_writeback_fn(bp);
+
/*
* clear any pre-existing error status on the buffer. This can occur if
* the buffer is corrupt on disk and the repair process doesn't clear
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index e2e99b21..03b7c242 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -751,6 +751,104 @@ clear_needsrepair(
libxfs_buf_relse(bp);
}
+static void
+update_sb_crc_only(
+ struct xfs_buf *bp)
+{
+ xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
+}
+
+/* Forcibly write the primary superblock with the NEEDSREPAIR flag set. */
+static void
+force_needsrepair(
+ struct xfs_mount *mp)
+{
+ struct xfs_buf_ops fake_ops;
+ struct xfs_buf *bp;
+ int error;
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb) ||
+ xfs_sb_version_needsrepair(&mp->m_sb))
+ return;
+
+ bp = libxfs_getsb(mp);
+ if (!bp || bp->b_error) {
+ do_log(
+ _("couldn't get superblock to set needsrepair, err=%d\n"),
+ bp ? bp->b_error : ENOMEM);
+ } else {
+ /*
+ * It's possible that we need to set NEEDSREPAIR before we've
+ * had a chance to fix the summary counters in the primary sb.
+ * With the exception of those counters, phase 1 already
+ * ensured that the geometry makes sense.
+ *
+ * Bad summary counters in the primary super can cause the
+ * write verifier to fail, so substitute a dummy that only sets
+ * the CRC. In the event of a crash, NEEDSREPAIR will prevent
+ * the kernel from mounting our potentially damaged filesystem
+ * until repair is run again, so it's ok to bypass the usual
+ * verification in this one case.
+ */
+ fake_ops = xfs_sb_buf_ops; /* struct copy */
+ fake_ops.verify_write = update_sb_crc_only;
+
+ mp->m_sb.sb_features_incompat |=
+ XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
+ libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
+
+ /* Force the primary super to disk immediately. */
+ bp->b_ops = &fake_ops;
+ error = -libxfs_bwrite(bp);
+ bp->b_ops = &xfs_sb_buf_ops;
+ if (error)
+ do_log(_("couldn't force needsrepair, err=%d\n"), error);
+ }
+ if (bp)
+ libxfs_buf_relse(bp);
+}
+
+/*
+ * Intercept the first non-super write to the filesystem so we can set
+ * NEEDSREPAIR to protect the filesystem from mount in case of a crash.
+ */
+static void
+repair_capture_writeback(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ static pthread_mutex_t wb_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+ /*
+ * This write hook ignores any buffer that looks like a superblock to
+ * avoid hook recursion when setting NEEDSREPAIR. Higher level code
+ * modifying an sb must control the flag manually.
+ */
+ if (bp->b_ops == &xfs_sb_buf_ops || bp->b_bn == XFS_SB_DADDR)
+ return;
+
+ pthread_mutex_lock(&wb_mutex);
+
+ /*
+ * If someone else already dropped the hook, then needsrepair has
+ * already been set on the filesystem and we can unlock.
+ */
+ if (mp->m_buf_writeback_fn != repair_capture_writeback)
+ goto unlock;
+
+ /*
+ * If we get here, the buffer being written is not a superblock, and
+ * needsrepair needs to be set. The hook is kept in place to plug all
+ * other writes until the sb write finishes.
+ */
+ force_needsrepair(mp);
+
+ /* We only set needsrepair once, so clear the hook now. */
+ mp->m_buf_writeback_fn = NULL;
+unlock:
+ pthread_mutex_unlock(&wb_mutex);
+}
+
int
main(int argc, char **argv)
{
@@ -847,6 +945,10 @@ main(int argc, char **argv)
if (verbose > 2)
mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
+ /* Capture the first writeback so that we can set needsrepair. */
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ mp->m_buf_writeback_fn = repair_capture_writeback;
+
/*
* set XFS-independent status vars from the mount/sb structure
*/
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 1/4] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem
2021-02-23 3:01 ` [PATCH 1/4] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem Darrick J. Wong
@ 2021-02-24 5:39 ` Allison Henderson
2021-02-25 8:16 ` Christoph Hellwig
1 sibling, 0 replies; 16+ messages in thread
From: Allison Henderson @ 2021-02-24 5:39 UTC (permalink / raw)
To: Darrick J. Wong, sandeen; +Cc: Brian Foster, linux-xfs
On 2/22/21 8:01 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Add a hook to the buffer cache so that xfs_repair can intercept the
> first write to a V5 filesystem to set the NEEDSREPAIR flag. In the
> event that xfs_repair dirties the filesystem and goes down, this ensures
> that the sysadmin will have to re-start repair before mounting.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: Brian Foster <bfoster@redhat.com>
Ok, I think it looks good
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> ---
> include/xfs_mount.h | 4 ++
> libxfs/rdwr.c | 4 ++
> repair/xfs_repair.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 110 insertions(+)
>
>
> diff --git a/include/xfs_mount.h b/include/xfs_mount.h
> index 75230ca5..f93a9f11 100644
> --- a/include/xfs_mount.h
> +++ b/include/xfs_mount.h
> @@ -11,6 +11,8 @@ struct xfs_inode;
> struct xfs_buftarg;
> struct xfs_da_geometry;
>
> +typedef void (*buf_writeback_fn)(struct xfs_buf *bp);
> +
> /*
> * Define a user-level mount structure with all we need
> * in order to make use of the numerous XFS_* macros.
> @@ -95,6 +97,8 @@ typedef struct xfs_mount {
> int qi_dqperchunk;
> } *m_quotainfo;
>
> + buf_writeback_fn m_buf_writeback_fn;
> +
> /*
> * xlog is defined in libxlog and thus is not intialized by libxfs. This
> * allows an application to initialize and store a reference to the log
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index ac783ce3..ca272387 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -812,6 +812,10 @@ libxfs_bwrite(
> return bp->b_error;
> }
>
> + /* Trigger the writeback hook if there is one. */
> + if (bp->b_mount->m_buf_writeback_fn)
> + bp->b_mount->m_buf_writeback_fn(bp);
> +
> /*
> * clear any pre-existing error status on the buffer. This can occur if
> * the buffer is corrupt on disk and the repair process doesn't clear
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index e2e99b21..03b7c242 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -751,6 +751,104 @@ clear_needsrepair(
> libxfs_buf_relse(bp);
> }
>
> +static void
> +update_sb_crc_only(
> + struct xfs_buf *bp)
> +{
> + xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
> +}
> +
> +/* Forcibly write the primary superblock with the NEEDSREPAIR flag set. */
> +static void
> +force_needsrepair(
> + struct xfs_mount *mp)
> +{
> + struct xfs_buf_ops fake_ops;
> + struct xfs_buf *bp;
> + int error;
> +
> + if (!xfs_sb_version_hascrc(&mp->m_sb) ||
> + xfs_sb_version_needsrepair(&mp->m_sb))
> + return;
> +
> + bp = libxfs_getsb(mp);
> + if (!bp || bp->b_error) {
> + do_log(
> + _("couldn't get superblock to set needsrepair, err=%d\n"),
> + bp ? bp->b_error : ENOMEM);
> + } else {
> + /*
> + * It's possible that we need to set NEEDSREPAIR before we've
> + * had a chance to fix the summary counters in the primary sb.
> + * With the exception of those counters, phase 1 already
> + * ensured that the geometry makes sense.
> + *
> + * Bad summary counters in the primary super can cause the
> + * write verifier to fail, so substitute a dummy that only sets
> + * the CRC. In the event of a crash, NEEDSREPAIR will prevent
> + * the kernel from mounting our potentially damaged filesystem
> + * until repair is run again, so it's ok to bypass the usual
> + * verification in this one case.
> + */
> + fake_ops = xfs_sb_buf_ops; /* struct copy */
> + fake_ops.verify_write = update_sb_crc_only;
> +
> + mp->m_sb.sb_features_incompat |=
> + XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
> + libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
> +
> + /* Force the primary super to disk immediately. */
> + bp->b_ops = &fake_ops;
> + error = -libxfs_bwrite(bp);
> + bp->b_ops = &xfs_sb_buf_ops;
> + if (error)
> + do_log(_("couldn't force needsrepair, err=%d\n"), error);
> + }
> + if (bp)
> + libxfs_buf_relse(bp);
> +}
> +
> +/*
> + * Intercept the first non-super write to the filesystem so we can set
> + * NEEDSREPAIR to protect the filesystem from mount in case of a crash.
> + */
> +static void
> +repair_capture_writeback(
> + struct xfs_buf *bp)
> +{
> + struct xfs_mount *mp = bp->b_mount;
> + static pthread_mutex_t wb_mutex = PTHREAD_MUTEX_INITIALIZER;
> +
> + /*
> + * This write hook ignores any buffer that looks like a superblock to
> + * avoid hook recursion when setting NEEDSREPAIR. Higher level code
> + * modifying an sb must control the flag manually.
> + */
> + if (bp->b_ops == &xfs_sb_buf_ops || bp->b_bn == XFS_SB_DADDR)
> + return;
> +
> + pthread_mutex_lock(&wb_mutex);
> +
> + /*
> + * If someone else already dropped the hook, then needsrepair has
> + * already been set on the filesystem and we can unlock.
> + */
> + if (mp->m_buf_writeback_fn != repair_capture_writeback)
> + goto unlock;
> +
> + /*
> + * If we get here, the buffer being written is not a superblock, and
> + * needsrepair needs to be set. The hook is kept in place to plug all
> + * other writes until the sb write finishes.
> + */
> + force_needsrepair(mp);
> +
> + /* We only set needsrepair once, so clear the hook now. */
> + mp->m_buf_writeback_fn = NULL;
> +unlock:
> + pthread_mutex_unlock(&wb_mutex);
> +}
> +
> int
> main(int argc, char **argv)
> {
> @@ -847,6 +945,10 @@ main(int argc, char **argv)
> if (verbose > 2)
> mp->m_flags |= LIBXFS_MOUNT_WANT_CORRUPTED;
>
> + /* Capture the first writeback so that we can set needsrepair. */
> + if (xfs_sb_version_hascrc(&mp->m_sb))
> + mp->m_buf_writeback_fn = repair_capture_writeback;
> +
> /*
> * set XFS-independent status vars from the mount/sb structure
> */
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/4] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem
2021-02-23 3:01 ` [PATCH 1/4] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem Darrick J. Wong
2021-02-24 5:39 ` Allison Henderson
@ 2021-02-25 8:16 ` Christoph Hellwig
1 sibling, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2021-02-25 8:16 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: sandeen, Brian Foster, linux-xfs
> diff --git a/include/xfs_mount.h b/include/xfs_mount.h
> index 75230ca5..f93a9f11 100644
> --- a/include/xfs_mount.h
> +++ b/include/xfs_mount.h
> @@ -11,6 +11,8 @@ struct xfs_inode;
> struct xfs_buftarg;
> struct xfs_da_geometry;
>
> +typedef void (*buf_writeback_fn)(struct xfs_buf *bp);
Any point in adding a typedef that is only used once?
> + if (!bp || bp->b_error) {
> + do_log(
> + _("couldn't get superblock to set needsrepair, err=%d\n"),
> + bp ? bp->b_error : ENOMEM);
Maybe add a goto out_buf_release goto here to avoid the extra level of
indentation for the normal path?
But the code itself looks good, so:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 2/4] libxfs: simulate system failure after a certain number of writes
2021-02-23 3:01 [PATCHSET v3 0/4] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
2021-02-23 3:01 ` [PATCH 1/4] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem Darrick J. Wong
@ 2021-02-23 3:01 ` Darrick J. Wong
2021-02-24 5:39 ` Allison Henderson
2021-02-25 8:17 ` Christoph Hellwig
2021-02-23 3:01 ` [PATCH 3/4] xfs_repair: factor phase transitions into a helper Darrick J. Wong
2021-02-23 3:01 ` [PATCH 4/4] xfs_repair: add post-phase error injection points Darrick J. Wong
3 siblings, 2 replies; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-23 3:01 UTC (permalink / raw)
To: sandeen, djwong; +Cc: Brian Foster, linux-xfs, bfoster
From: Darrick J. Wong <djwong@kernel.org>
Add an error injection knob so that we can simulate system failure after
a certain number of disk writes. This knob is being added so that we
can check repair's behavior after an arbitrary number of tests.
Set LIBXFS_DEBUG_WRITE_CRASH={ddev,logdev,rtdev}=nn in the environment
to make libxfs SIGKILL itself after nn writes to the data, log, or rt
devices. Note that this only applies to xfs_buf writes and zero_range.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
include/linux.h | 13 ++++++++++
libxfs/init.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++---
libxfs/libxfs_io.h | 19 +++++++++++++++
libxfs/rdwr.c | 6 ++++-
4 files changed, 101 insertions(+), 5 deletions(-)
diff --git a/include/linux.h b/include/linux.h
index 03b3278b..7bf59e07 100644
--- a/include/linux.h
+++ b/include/linux.h
@@ -31,6 +31,8 @@
#ifdef OVERRIDE_SYSTEM_FSXATTR
# undef fsxattr
#endif
+#include <unistd.h>
+#include <assert.h>
static __inline__ int xfsctl(const char *path, int fd, int cmd, void *p)
{
@@ -186,6 +188,17 @@ platform_zero_range(
#define platform_zero_range(fd, s, l) (-EOPNOTSUPP)
#endif
+/*
+ * Use SIGKILL to simulate an immediate program crash, without a chance to run
+ * atexit handlers.
+ */
+static inline void
+platform_crash(void)
+{
+ kill(getpid(), SIGKILL);
+ assert(0);
+}
+
/*
* Check whether we have to define FS_IOC_FS[GS]ETXATTR ourselves. These
* are a copy of the definitions moved to linux/uapi/fs.h in the 4.5 kernel,
diff --git a/libxfs/init.c b/libxfs/init.c
index 8a8ce3c4..1ec83791 100644
--- a/libxfs/init.c
+++ b/libxfs/init.c
@@ -590,7 +590,8 @@ libxfs_initialize_perag(
static struct xfs_buftarg *
libxfs_buftarg_alloc(
struct xfs_mount *mp,
- dev_t dev)
+ dev_t dev,
+ unsigned long write_fails)
{
struct xfs_buftarg *btp;
@@ -603,10 +604,29 @@ libxfs_buftarg_alloc(
btp->bt_mount = mp;
btp->bt_bdev = dev;
btp->flags = 0;
+ if (write_fails) {
+ btp->writes_left = write_fails;
+ btp->flags |= XFS_BUFTARG_INJECT_WRITE_FAIL;
+ }
+ pthread_mutex_init(&btp->lock, NULL);
return btp;
}
+enum libxfs_write_failure_nums {
+ WF_DATA = 0,
+ WF_LOG,
+ WF_RT,
+ WF_MAX_OPTS,
+};
+
+static char *wf_opts[] = {
+ [WF_DATA] = "ddev",
+ [WF_LOG] = "logdev",
+ [WF_RT] = "rtdev",
+ [WF_MAX_OPTS] = NULL,
+};
+
void
libxfs_buftarg_init(
struct xfs_mount *mp,
@@ -614,6 +634,46 @@ libxfs_buftarg_init(
dev_t logdev,
dev_t rtdev)
{
+ char *p = getenv("LIBXFS_DEBUG_WRITE_CRASH");
+ unsigned long dfail = 0, lfail = 0, rfail = 0;
+
+ /* Simulate utility crash after a certain number of writes. */
+ while (p && *p) {
+ char *val;
+
+ switch (getsubopt(&p, wf_opts, &val)) {
+ case WF_DATA:
+ if (!val) {
+ fprintf(stderr,
+ _("ddev write fail requires a parameter\n"));
+ exit(1);
+ }
+ dfail = strtoul(val, NULL, 0);
+ break;
+ case WF_LOG:
+ if (!val) {
+ fprintf(stderr,
+ _("logdev write fail requires a parameter\n"));
+ exit(1);
+ }
+ lfail = strtoul(val, NULL, 0);
+ break;
+ case WF_RT:
+ if (!val) {
+ fprintf(stderr,
+ _("rtdev write fail requires a parameter\n"));
+ exit(1);
+ }
+ rfail = strtoul(val, NULL, 0);
+ break;
+ default:
+ fprintf(stderr, _("unknown write fail type %s\n"),
+ val);
+ exit(1);
+ break;
+ }
+ }
+
if (mp->m_ddev_targp) {
/* should already have all buftargs initialised */
if (mp->m_ddev_targp->bt_bdev != dev ||
@@ -647,12 +707,12 @@ libxfs_buftarg_init(
return;
}
- mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev);
+ mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev, dfail);
if (!logdev || logdev == dev)
mp->m_logdev_targp = mp->m_ddev_targp;
else
- mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev);
- mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev);
+ mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev, lfail);
+ mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev, rfail);
}
/*
diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
index c80e2d59..3cc4f4ee 100644
--- a/libxfs/libxfs_io.h
+++ b/libxfs/libxfs_io.h
@@ -22,6 +22,8 @@ struct xfs_perag;
*/
struct xfs_buftarg {
struct xfs_mount *bt_mount;
+ pthread_mutex_t lock;
+ unsigned long writes_left;
dev_t bt_bdev;
unsigned int flags;
};
@@ -30,6 +32,23 @@ struct xfs_buftarg {
#define XFS_BUFTARG_LOST_WRITE (1 << 0)
/* A dirty buffer failed the write verifier. */
#define XFS_BUFTARG_CORRUPT_WRITE (1 << 1)
+/* Simulate failure after a certain number of writes. */
+#define XFS_BUFTARG_INJECT_WRITE_FAIL (1 << 2)
+
+/* Simulate the system crashing after a certain number of writes. */
+static inline void
+xfs_buftarg_trip_write(
+ struct xfs_buftarg *btp)
+{
+ if (!(btp->flags & XFS_BUFTARG_INJECT_WRITE_FAIL))
+ return;
+
+ pthread_mutex_lock(&btp->lock);
+ btp->writes_left--;
+ if (!btp->writes_left)
+ platform_crash();
+ pthread_mutex_unlock(&btp->lock);
+}
extern void libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev,
dev_t logdev, dev_t rtdev);
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index ca272387..fd456d6b 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -74,8 +74,10 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
/* try to use special zeroing methods, fall back to writes if needed */
len_bytes = LIBXFS_BBTOOFF64(len);
error = platform_zero_range(fd, start_offset, len_bytes);
- if (!error)
+ if (!error) {
+ xfs_buftarg_trip_write(btp);
return 0;
+ }
zsize = min(BDSTRAT_SIZE, BBTOB(len));
if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
@@ -105,6 +107,7 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
progname, __FUNCTION__);
exit(1);
}
+ xfs_buftarg_trip_write(btp);
offset += bytes;
}
free(z);
@@ -860,6 +863,7 @@ libxfs_bwrite(
} else {
bp->b_flags |= LIBXFS_B_UPTODATE;
bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED);
+ xfs_buftarg_trip_write(bp->b_target);
}
return bp->b_error;
}
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 2/4] libxfs: simulate system failure after a certain number of writes
2021-02-23 3:01 ` [PATCH 2/4] libxfs: simulate system failure after a certain number of writes Darrick J. Wong
@ 2021-02-24 5:39 ` Allison Henderson
2021-02-25 8:17 ` Christoph Hellwig
1 sibling, 0 replies; 16+ messages in thread
From: Allison Henderson @ 2021-02-24 5:39 UTC (permalink / raw)
To: Darrick J. Wong, sandeen; +Cc: Brian Foster, linux-xfs
On 2/22/21 8:01 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Add an error injection knob so that we can simulate system failure after
> a certain number of disk writes. This knob is being added so that we
> can check repair's behavior after an arbitrary number of tests.
>
> Set LIBXFS_DEBUG_WRITE_CRASH={ddev,logdev,rtdev}=nn in the environment
> to make libxfs SIGKILL itself after nn writes to the data, log, or rt
> devices. Note that this only applies to xfs_buf writes and zero_range.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: Brian Foster <bfoster@redhat.com>
Ok, makes sense
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> ---
> include/linux.h | 13 ++++++++++
> libxfs/init.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++---
> libxfs/libxfs_io.h | 19 +++++++++++++++
> libxfs/rdwr.c | 6 ++++-
> 4 files changed, 101 insertions(+), 5 deletions(-)
>
>
> diff --git a/include/linux.h b/include/linux.h
> index 03b3278b..7bf59e07 100644
> --- a/include/linux.h
> +++ b/include/linux.h
> @@ -31,6 +31,8 @@
> #ifdef OVERRIDE_SYSTEM_FSXATTR
> # undef fsxattr
> #endif
> +#include <unistd.h>
> +#include <assert.h>
>
> static __inline__ int xfsctl(const char *path, int fd, int cmd, void *p)
> {
> @@ -186,6 +188,17 @@ platform_zero_range(
> #define platform_zero_range(fd, s, l) (-EOPNOTSUPP)
> #endif
>
> +/*
> + * Use SIGKILL to simulate an immediate program crash, without a chance to run
> + * atexit handlers.
> + */
> +static inline void
> +platform_crash(void)
> +{
> + kill(getpid(), SIGKILL);
> + assert(0);
> +}
> +
> /*
> * Check whether we have to define FS_IOC_FS[GS]ETXATTR ourselves. These
> * are a copy of the definitions moved to linux/uapi/fs.h in the 4.5 kernel,
> diff --git a/libxfs/init.c b/libxfs/init.c
> index 8a8ce3c4..1ec83791 100644
> --- a/libxfs/init.c
> +++ b/libxfs/init.c
> @@ -590,7 +590,8 @@ libxfs_initialize_perag(
> static struct xfs_buftarg *
> libxfs_buftarg_alloc(
> struct xfs_mount *mp,
> - dev_t dev)
> + dev_t dev,
> + unsigned long write_fails)
> {
> struct xfs_buftarg *btp;
>
> @@ -603,10 +604,29 @@ libxfs_buftarg_alloc(
> btp->bt_mount = mp;
> btp->bt_bdev = dev;
> btp->flags = 0;
> + if (write_fails) {
> + btp->writes_left = write_fails;
> + btp->flags |= XFS_BUFTARG_INJECT_WRITE_FAIL;
> + }
> + pthread_mutex_init(&btp->lock, NULL);
>
> return btp;
> }
>
> +enum libxfs_write_failure_nums {
> + WF_DATA = 0,
> + WF_LOG,
> + WF_RT,
> + WF_MAX_OPTS,
> +};
> +
> +static char *wf_opts[] = {
> + [WF_DATA] = "ddev",
> + [WF_LOG] = "logdev",
> + [WF_RT] = "rtdev",
> + [WF_MAX_OPTS] = NULL,
> +};
> +
> void
> libxfs_buftarg_init(
> struct xfs_mount *mp,
> @@ -614,6 +634,46 @@ libxfs_buftarg_init(
> dev_t logdev,
> dev_t rtdev)
> {
> + char *p = getenv("LIBXFS_DEBUG_WRITE_CRASH");
> + unsigned long dfail = 0, lfail = 0, rfail = 0;
> +
> + /* Simulate utility crash after a certain number of writes. */
> + while (p && *p) {
> + char *val;
> +
> + switch (getsubopt(&p, wf_opts, &val)) {
> + case WF_DATA:
> + if (!val) {
> + fprintf(stderr,
> + _("ddev write fail requires a parameter\n"));
> + exit(1);
> + }
> + dfail = strtoul(val, NULL, 0);
> + break;
> + case WF_LOG:
> + if (!val) {
> + fprintf(stderr,
> + _("logdev write fail requires a parameter\n"));
> + exit(1);
> + }
> + lfail = strtoul(val, NULL, 0);
> + break;
> + case WF_RT:
> + if (!val) {
> + fprintf(stderr,
> + _("rtdev write fail requires a parameter\n"));
> + exit(1);
> + }
> + rfail = strtoul(val, NULL, 0);
> + break;
> + default:
> + fprintf(stderr, _("unknown write fail type %s\n"),
> + val);
> + exit(1);
> + break;
> + }
> + }
> +
> if (mp->m_ddev_targp) {
> /* should already have all buftargs initialised */
> if (mp->m_ddev_targp->bt_bdev != dev ||
> @@ -647,12 +707,12 @@ libxfs_buftarg_init(
> return;
> }
>
> - mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev);
> + mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev, dfail);
> if (!logdev || logdev == dev)
> mp->m_logdev_targp = mp->m_ddev_targp;
> else
> - mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev);
> - mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev);
> + mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev, lfail);
> + mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev, rfail);
> }
>
> /*
> diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
> index c80e2d59..3cc4f4ee 100644
> --- a/libxfs/libxfs_io.h
> +++ b/libxfs/libxfs_io.h
> @@ -22,6 +22,8 @@ struct xfs_perag;
> */
> struct xfs_buftarg {
> struct xfs_mount *bt_mount;
> + pthread_mutex_t lock;
> + unsigned long writes_left;
> dev_t bt_bdev;
> unsigned int flags;
> };
> @@ -30,6 +32,23 @@ struct xfs_buftarg {
> #define XFS_BUFTARG_LOST_WRITE (1 << 0)
> /* A dirty buffer failed the write verifier. */
> #define XFS_BUFTARG_CORRUPT_WRITE (1 << 1)
> +/* Simulate failure after a certain number of writes. */
> +#define XFS_BUFTARG_INJECT_WRITE_FAIL (1 << 2)
> +
> +/* Simulate the system crashing after a certain number of writes. */
> +static inline void
> +xfs_buftarg_trip_write(
> + struct xfs_buftarg *btp)
> +{
> + if (!(btp->flags & XFS_BUFTARG_INJECT_WRITE_FAIL))
> + return;
> +
> + pthread_mutex_lock(&btp->lock);
> + btp->writes_left--;
> + if (!btp->writes_left)
> + platform_crash();
> + pthread_mutex_unlock(&btp->lock);
> +}
>
> extern void libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev,
> dev_t logdev, dev_t rtdev);
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index ca272387..fd456d6b 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -74,8 +74,10 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
> /* try to use special zeroing methods, fall back to writes if needed */
> len_bytes = LIBXFS_BBTOOFF64(len);
> error = platform_zero_range(fd, start_offset, len_bytes);
> - if (!error)
> + if (!error) {
> + xfs_buftarg_trip_write(btp);
> return 0;
> + }
>
> zsize = min(BDSTRAT_SIZE, BBTOB(len));
> if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
> @@ -105,6 +107,7 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
> progname, __FUNCTION__);
> exit(1);
> }
> + xfs_buftarg_trip_write(btp);
> offset += bytes;
> }
> free(z);
> @@ -860,6 +863,7 @@ libxfs_bwrite(
> } else {
> bp->b_flags |= LIBXFS_B_UPTODATE;
> bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED);
> + xfs_buftarg_trip_write(bp->b_target);
> }
> return bp->b_error;
> }
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/4] libxfs: simulate system failure after a certain number of writes
2021-02-23 3:01 ` [PATCH 2/4] libxfs: simulate system failure after a certain number of writes Darrick J. Wong
2021-02-24 5:39 ` Allison Henderson
@ 2021-02-25 8:17 ` Christoph Hellwig
1 sibling, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2021-02-25 8:17 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: sandeen, Brian Foster, linux-xfs
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 3/4] xfs_repair: factor phase transitions into a helper
2021-02-23 3:01 [PATCHSET v3 0/4] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
2021-02-23 3:01 ` [PATCH 1/4] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem Darrick J. Wong
2021-02-23 3:01 ` [PATCH 2/4] libxfs: simulate system failure after a certain number of writes Darrick J. Wong
@ 2021-02-23 3:01 ` Darrick J. Wong
2021-02-24 5:39 ` Allison Henderson
2021-02-25 8:18 ` Christoph Hellwig
2021-02-23 3:01 ` [PATCH 4/4] xfs_repair: add post-phase error injection points Darrick J. Wong
3 siblings, 2 replies; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-23 3:01 UTC (permalink / raw)
To: sandeen, djwong; +Cc: Eric Sandeen, Brian Foster, linux-xfs, bfoster
From: Darrick J. Wong <djwong@kernel.org>
Create a helper function to centralize all the stuff we do at the end of
a repair phase (which for now is limited to reporting progress). The
next patch will add more interesting things to this helper.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
repair/xfs_repair.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 03b7c242..a9236bb7 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -849,6 +849,12 @@ repair_capture_writeback(
pthread_mutex_unlock(&wb_mutex);
}
+static inline void
+phase_end(int phase)
+{
+ timestamp(PHASE_END, phase, NULL);
+}
+
int
main(int argc, char **argv)
{
@@ -878,7 +884,7 @@ main(int argc, char **argv)
msgbuf = malloc(DURATION_BUF_SIZE);
timestamp(PHASE_START, 0, NULL);
- timestamp(PHASE_END, 0, NULL);
+ phase_end(0);
/* -f forces this, but let's be nice and autodetect it, as well. */
if (!isa_file) {
@@ -901,7 +907,7 @@ main(int argc, char **argv)
/* do phase1 to make sure we have a superblock */
phase1(temp_mp);
- timestamp(PHASE_END, 1, NULL);
+ phase_end(1);
if (no_modify && primary_sb_modified) {
do_warn(_("Primary superblock would have been modified.\n"
@@ -1127,23 +1133,23 @@ main(int argc, char **argv)
/* make sure the per-ag freespace maps are ok so we can mount the fs */
phase2(mp, phase2_threads);
- timestamp(PHASE_END, 2, NULL);
+ phase_end(2);
if (do_prefetch)
init_prefetch(mp);
phase3(mp, phase2_threads);
- timestamp(PHASE_END, 3, NULL);
+ phase_end(3);
phase4(mp);
- timestamp(PHASE_END, 4, NULL);
+ phase_end(4);
if (no_modify)
printf(_("No modify flag set, skipping phase 5\n"));
else {
phase5(mp);
}
- timestamp(PHASE_END, 5, NULL);
+ phase_end(5);
/*
* Done with the block usage maps, toss them...
@@ -1153,10 +1159,10 @@ main(int argc, char **argv)
if (!bad_ino_btree) {
phase6(mp);
- timestamp(PHASE_END, 6, NULL);
+ phase_end(6);
phase7(mp, phase2_threads);
- timestamp(PHASE_END, 7, NULL);
+ phase_end(7);
} else {
do_warn(
_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 3/4] xfs_repair: factor phase transitions into a helper
2021-02-23 3:01 ` [PATCH 3/4] xfs_repair: factor phase transitions into a helper Darrick J. Wong
@ 2021-02-24 5:39 ` Allison Henderson
2021-02-25 8:18 ` Christoph Hellwig
1 sibling, 0 replies; 16+ messages in thread
From: Allison Henderson @ 2021-02-24 5:39 UTC (permalink / raw)
To: Darrick J. Wong, sandeen; +Cc: Eric Sandeen, Brian Foster, linux-xfs
On 2/22/21 8:01 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Create a helper function to centralize all the stuff we do at the end of
> a repair phase (which for now is limited to reporting progress). The
> next patch will add more interesting things to this helper.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: Eric Sandeen <sandeen@redhat.com>
> Reviewed-by: Brian Foster <bfoster@redhat.com>
Looks ok:
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> ---
> repair/xfs_repair.c | 22 ++++++++++++++--------
> 1 file changed, 14 insertions(+), 8 deletions(-)
>
>
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 03b7c242..a9236bb7 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -849,6 +849,12 @@ repair_capture_writeback(
> pthread_mutex_unlock(&wb_mutex);
> }
>
> +static inline void
> +phase_end(int phase)
> +{
> + timestamp(PHASE_END, phase, NULL);
> +}
> +
> int
> main(int argc, char **argv)
> {
> @@ -878,7 +884,7 @@ main(int argc, char **argv)
> msgbuf = malloc(DURATION_BUF_SIZE);
>
> timestamp(PHASE_START, 0, NULL);
> - timestamp(PHASE_END, 0, NULL);
> + phase_end(0);
>
> /* -f forces this, but let's be nice and autodetect it, as well. */
> if (!isa_file) {
> @@ -901,7 +907,7 @@ main(int argc, char **argv)
>
> /* do phase1 to make sure we have a superblock */
> phase1(temp_mp);
> - timestamp(PHASE_END, 1, NULL);
> + phase_end(1);
>
> if (no_modify && primary_sb_modified) {
> do_warn(_("Primary superblock would have been modified.\n"
> @@ -1127,23 +1133,23 @@ main(int argc, char **argv)
>
> /* make sure the per-ag freespace maps are ok so we can mount the fs */
> phase2(mp, phase2_threads);
> - timestamp(PHASE_END, 2, NULL);
> + phase_end(2);
>
> if (do_prefetch)
> init_prefetch(mp);
>
> phase3(mp, phase2_threads);
> - timestamp(PHASE_END, 3, NULL);
> + phase_end(3);
>
> phase4(mp);
> - timestamp(PHASE_END, 4, NULL);
> + phase_end(4);
>
> if (no_modify)
> printf(_("No modify flag set, skipping phase 5\n"));
> else {
> phase5(mp);
> }
> - timestamp(PHASE_END, 5, NULL);
> + phase_end(5);
>
> /*
> * Done with the block usage maps, toss them...
> @@ -1153,10 +1159,10 @@ main(int argc, char **argv)
>
> if (!bad_ino_btree) {
> phase6(mp);
> - timestamp(PHASE_END, 6, NULL);
> + phase_end(6);
>
> phase7(mp, phase2_threads);
> - timestamp(PHASE_END, 7, NULL);
> + phase_end(7);
> } else {
> do_warn(
> _("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 3/4] xfs_repair: factor phase transitions into a helper
2021-02-23 3:01 ` [PATCH 3/4] xfs_repair: factor phase transitions into a helper Darrick J. Wong
2021-02-24 5:39 ` Allison Henderson
@ 2021-02-25 8:18 ` Christoph Hellwig
1 sibling, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2021-02-25 8:18 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: sandeen, Eric Sandeen, Brian Foster, linux-xfs
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 4/4] xfs_repair: add post-phase error injection points
2021-02-23 3:01 [PATCHSET v3 0/4] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
` (2 preceding siblings ...)
2021-02-23 3:01 ` [PATCH 3/4] xfs_repair: factor phase transitions into a helper Darrick J. Wong
@ 2021-02-23 3:01 ` Darrick J. Wong
2021-02-24 5:39 ` Allison Henderson
2021-02-25 8:19 ` Christoph Hellwig
3 siblings, 2 replies; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-23 3:01 UTC (permalink / raw)
To: sandeen, djwong; +Cc: Eric Sandeen, Brian Foster, linux-xfs, bfoster
From: Darrick J. Wong <djwong@kernel.org>
Create an error injection point so that we can simulate repair failing
after a certain phase.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
repair/globals.c | 3 +++
repair/globals.h | 3 +++
repair/xfs_repair.c | 8 ++++++++
3 files changed, 14 insertions(+)
diff --git a/repair/globals.c b/repair/globals.c
index 110d98b6..537d068b 100644
--- a/repair/globals.c
+++ b/repair/globals.c
@@ -117,3 +117,6 @@ uint64_t *prog_rpt_done;
int ag_stride;
int thread_count;
+
+/* If nonzero, simulate failure after this phase. */
+int fail_after_phase;
diff --git a/repair/globals.h b/repair/globals.h
index 1d397b35..a9287320 100644
--- a/repair/globals.h
+++ b/repair/globals.h
@@ -162,4 +162,7 @@ extern uint64_t *prog_rpt_done;
extern int ag_stride;
extern int thread_count;
+/* If nonzero, simulate failure after this phase. */
+extern int fail_after_phase;
+
#endif /* _XFS_REPAIR_GLOBAL_H */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index a9236bb7..64d7607f 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -362,6 +362,10 @@ process_args(int argc, char **argv)
if (report_corrected && no_modify)
usage();
+
+ p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
+ if (p)
+ fail_after_phase = (int)strtol(p, NULL, 0);
}
void __attribute__((noreturn))
@@ -853,6 +857,10 @@ static inline void
phase_end(int phase)
{
timestamp(PHASE_END, phase, NULL);
+
+ /* Fail if someone injected an post-phase error. */
+ if (fail_after_phase && phase == fail_after_phase)
+ platform_crash();
}
int
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 4/4] xfs_repair: add post-phase error injection points
2021-02-23 3:01 ` [PATCH 4/4] xfs_repair: add post-phase error injection points Darrick J. Wong
@ 2021-02-24 5:39 ` Allison Henderson
2021-02-25 8:19 ` Christoph Hellwig
1 sibling, 0 replies; 16+ messages in thread
From: Allison Henderson @ 2021-02-24 5:39 UTC (permalink / raw)
To: Darrick J. Wong, sandeen; +Cc: Eric Sandeen, Brian Foster, linux-xfs
On 2/22/21 8:01 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Create an error injection point so that we can simulate repair failing
> after a certain phase.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: Eric Sandeen <sandeen@redhat.com>
> Reviewed-by: Brian Foster <bfoster@redhat.com>
Looks fine
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> ---
> repair/globals.c | 3 +++
> repair/globals.h | 3 +++
> repair/xfs_repair.c | 8 ++++++++
> 3 files changed, 14 insertions(+)
>
>
> diff --git a/repair/globals.c b/repair/globals.c
> index 110d98b6..537d068b 100644
> --- a/repair/globals.c
> +++ b/repair/globals.c
> @@ -117,3 +117,6 @@ uint64_t *prog_rpt_done;
>
> int ag_stride;
> int thread_count;
> +
> +/* If nonzero, simulate failure after this phase. */
> +int fail_after_phase;
> diff --git a/repair/globals.h b/repair/globals.h
> index 1d397b35..a9287320 100644
> --- a/repair/globals.h
> +++ b/repair/globals.h
> @@ -162,4 +162,7 @@ extern uint64_t *prog_rpt_done;
> extern int ag_stride;
> extern int thread_count;
>
> +/* If nonzero, simulate failure after this phase. */
> +extern int fail_after_phase;
> +
> #endif /* _XFS_REPAIR_GLOBAL_H */
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index a9236bb7..64d7607f 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -362,6 +362,10 @@ process_args(int argc, char **argv)
>
> if (report_corrected && no_modify)
> usage();
> +
> + p = getenv("XFS_REPAIR_FAIL_AFTER_PHASE");
> + if (p)
> + fail_after_phase = (int)strtol(p, NULL, 0);
> }
>
> void __attribute__((noreturn))
> @@ -853,6 +857,10 @@ static inline void
> phase_end(int phase)
> {
> timestamp(PHASE_END, phase, NULL);
> +
> + /* Fail if someone injected an post-phase error. */
> + if (fail_after_phase && phase == fail_after_phase)
> + platform_crash();
> }
>
> int
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 4/4] xfs_repair: add post-phase error injection points
2021-02-23 3:01 ` [PATCH 4/4] xfs_repair: add post-phase error injection points Darrick J. Wong
2021-02-24 5:39 ` Allison Henderson
@ 2021-02-25 8:19 ` Christoph Hellwig
1 sibling, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2021-02-25 8:19 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: sandeen, Eric Sandeen, Brian Foster, linux-xfs
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 3/4] xfs_repair: factor phase transitions into a helper
2021-02-19 3:17 [PATCHSET v2 0/4] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
@ 2021-02-19 3:18 ` Darrick J. Wong
2021-02-20 0:58 ` Eric Sandeen
2021-02-22 14:11 ` Brian Foster
0 siblings, 2 replies; 16+ messages in thread
From: Darrick J. Wong @ 2021-02-19 3:18 UTC (permalink / raw)
To: sandeen, djwong; +Cc: linux-xfs, bfoster
From: Darrick J. Wong <djwong@kernel.org>
Create a helper function to centralize all the stuff we do at the end of
a repair phase (which for now is limited to reporting progress). The
next patch will add more interesting things to this helper.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
repair/xfs_repair.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 8eb7da53..891b3b23 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -847,6 +847,12 @@ repair_capture_writeback(
pthread_mutex_unlock(&wb_mutex);
}
+static inline void
+phase_end(int phase)
+{
+ timestamp(PHASE_END, phase, NULL);
+}
+
int
main(int argc, char **argv)
{
@@ -876,7 +882,7 @@ main(int argc, char **argv)
msgbuf = malloc(DURATION_BUF_SIZE);
timestamp(PHASE_START, 0, NULL);
- timestamp(PHASE_END, 0, NULL);
+ phase_end(0);
/* -f forces this, but let's be nice and autodetect it, as well. */
if (!isa_file) {
@@ -899,7 +905,7 @@ main(int argc, char **argv)
/* do phase1 to make sure we have a superblock */
phase1(temp_mp);
- timestamp(PHASE_END, 1, NULL);
+ phase_end(1);
if (no_modify && primary_sb_modified) {
do_warn(_("Primary superblock would have been modified.\n"
@@ -1125,23 +1131,23 @@ main(int argc, char **argv)
/* make sure the per-ag freespace maps are ok so we can mount the fs */
phase2(mp, phase2_threads);
- timestamp(PHASE_END, 2, NULL);
+ phase_end(2);
if (do_prefetch)
init_prefetch(mp);
phase3(mp, phase2_threads);
- timestamp(PHASE_END, 3, NULL);
+ phase_end(3);
phase4(mp);
- timestamp(PHASE_END, 4, NULL);
+ phase_end(4);
if (no_modify)
printf(_("No modify flag set, skipping phase 5\n"));
else {
phase5(mp);
}
- timestamp(PHASE_END, 5, NULL);
+ phase_end(5);
/*
* Done with the block usage maps, toss them...
@@ -1151,10 +1157,10 @@ main(int argc, char **argv)
if (!bad_ino_btree) {
phase6(mp);
- timestamp(PHASE_END, 6, NULL);
+ phase_end(6);
phase7(mp, phase2_threads);
- timestamp(PHASE_END, 7, NULL);
+ phase_end(7);
} else {
do_warn(
_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 3/4] xfs_repair: factor phase transitions into a helper
2021-02-19 3:18 ` [PATCH 3/4] xfs_repair: factor phase transitions into a helper Darrick J. Wong
@ 2021-02-20 0:58 ` Eric Sandeen
2021-02-22 14:11 ` Brian Foster
1 sibling, 0 replies; 16+ messages in thread
From: Eric Sandeen @ 2021-02-20 0:58 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs, bfoster
On 2/18/21 9:18 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Create a helper function to centralize all the stuff we do at the end of
> a repair phase (which for now is limited to reporting progress). The
> next patch will add more interesting things to this helper.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> repair/xfs_repair.c | 22 ++++++++++++++--------
> 1 file changed, 14 insertions(+), 8 deletions(-)
>
>
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 8eb7da53..891b3b23 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -847,6 +847,12 @@ repair_capture_writeback(
> pthread_mutex_unlock(&wb_mutex);
> }
>
> +static inline void
> +phase_end(int phase)
> +{
> + timestamp(PHASE_END, phase, NULL);
(future cleanup, remove the unused 3rd arg from this function and make
it a void....)
otherwise this is a trivial no-op, congratulations you gat an RVB! ;)
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
> +}
> +
> int
> main(int argc, char **argv)
> {
> @@ -876,7 +882,7 @@ main(int argc, char **argv)
> msgbuf = malloc(DURATION_BUF_SIZE);
>
> timestamp(PHASE_START, 0, NULL);
> - timestamp(PHASE_END, 0, NULL);
> + phase_end(0);
>
> /* -f forces this, but let's be nice and autodetect it, as well. */
> if (!isa_file) {
> @@ -899,7 +905,7 @@ main(int argc, char **argv)
>
> /* do phase1 to make sure we have a superblock */
> phase1(temp_mp);
> - timestamp(PHASE_END, 1, NULL);
> + phase_end(1);
>
> if (no_modify && primary_sb_modified) {
> do_warn(_("Primary superblock would have been modified.\n"
> @@ -1125,23 +1131,23 @@ main(int argc, char **argv)
>
> /* make sure the per-ag freespace maps are ok so we can mount the fs */
> phase2(mp, phase2_threads);
> - timestamp(PHASE_END, 2, NULL);
> + phase_end(2);
>
> if (do_prefetch)
> init_prefetch(mp);
>
> phase3(mp, phase2_threads);
> - timestamp(PHASE_END, 3, NULL);
> + phase_end(3);
>
> phase4(mp);
> - timestamp(PHASE_END, 4, NULL);
> + phase_end(4);
>
> if (no_modify)
> printf(_("No modify flag set, skipping phase 5\n"));
> else {
> phase5(mp);
> }
> - timestamp(PHASE_END, 5, NULL);
> + phase_end(5);
>
> /*
> * Done with the block usage maps, toss them...
> @@ -1151,10 +1157,10 @@ main(int argc, char **argv)
>
> if (!bad_ino_btree) {
> phase6(mp);
> - timestamp(PHASE_END, 6, NULL);
> + phase_end(6);
>
> phase7(mp, phase2_threads);
> - timestamp(PHASE_END, 7, NULL);
> + phase_end(7);
> } else {
> do_warn(
> _("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 3/4] xfs_repair: factor phase transitions into a helper
2021-02-19 3:18 ` [PATCH 3/4] xfs_repair: factor phase transitions into a helper Darrick J. Wong
2021-02-20 0:58 ` Eric Sandeen
@ 2021-02-22 14:11 ` Brian Foster
1 sibling, 0 replies; 16+ messages in thread
From: Brian Foster @ 2021-02-22 14:11 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: sandeen, linux-xfs
On Thu, Feb 18, 2021 at 07:18:10PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Create a helper function to centralize all the stuff we do at the end of
> a repair phase (which for now is limited to reporting progress). The
> next patch will add more interesting things to this helper.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
Reviewed-by: Brian Foster <bfoster@redhat.com>
> repair/xfs_repair.c | 22 ++++++++++++++--------
> 1 file changed, 14 insertions(+), 8 deletions(-)
>
>
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 8eb7da53..891b3b23 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -847,6 +847,12 @@ repair_capture_writeback(
> pthread_mutex_unlock(&wb_mutex);
> }
>
> +static inline void
> +phase_end(int phase)
> +{
> + timestamp(PHASE_END, phase, NULL);
> +}
> +
> int
> main(int argc, char **argv)
> {
> @@ -876,7 +882,7 @@ main(int argc, char **argv)
> msgbuf = malloc(DURATION_BUF_SIZE);
>
> timestamp(PHASE_START, 0, NULL);
> - timestamp(PHASE_END, 0, NULL);
> + phase_end(0);
>
> /* -f forces this, but let's be nice and autodetect it, as well. */
> if (!isa_file) {
> @@ -899,7 +905,7 @@ main(int argc, char **argv)
>
> /* do phase1 to make sure we have a superblock */
> phase1(temp_mp);
> - timestamp(PHASE_END, 1, NULL);
> + phase_end(1);
>
> if (no_modify && primary_sb_modified) {
> do_warn(_("Primary superblock would have been modified.\n"
> @@ -1125,23 +1131,23 @@ main(int argc, char **argv)
>
> /* make sure the per-ag freespace maps are ok so we can mount the fs */
> phase2(mp, phase2_threads);
> - timestamp(PHASE_END, 2, NULL);
> + phase_end(2);
>
> if (do_prefetch)
> init_prefetch(mp);
>
> phase3(mp, phase2_threads);
> - timestamp(PHASE_END, 3, NULL);
> + phase_end(3);
>
> phase4(mp);
> - timestamp(PHASE_END, 4, NULL);
> + phase_end(4);
>
> if (no_modify)
> printf(_("No modify flag set, skipping phase 5\n"));
> else {
> phase5(mp);
> }
> - timestamp(PHASE_END, 5, NULL);
> + phase_end(5);
>
> /*
> * Done with the block usage maps, toss them...
> @@ -1151,10 +1157,10 @@ main(int argc, char **argv)
>
> if (!bad_ino_btree) {
> phase6(mp);
> - timestamp(PHASE_END, 6, NULL);
> + phase_end(6);
>
> phase7(mp, phase2_threads);
> - timestamp(PHASE_END, 7, NULL);
> + phase_end(7);
> } else {
> do_warn(
> _("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
>
^ permalink raw reply [flat|nested] 16+ messages in thread