From: Brian Foster <bfoster@redhat.com>
To: "Darrick J. Wong" <djwong@kernel.org>
Cc: sandeen@sandeen.net, linux-xfs@vger.kernel.org
Subject: Re: [PATCH 2/4] libxfs: simulate system failure after a certain number of writes
Date: Mon, 22 Feb 2021 09:11:32 -0500 [thread overview]
Message-ID: <20210222141132.GB886774@bfoster> (raw)
In-Reply-To: <161370468470.2389661.11874247132336274370.stgit@magnolia>
On Thu, Feb 18, 2021 at 07:18:04PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Add an error injection knob so that we can simulate system failure after
> a certain number of disk writes. This knob is being added so that we
> can check repair's behavior after an arbitrary number of tests.
>
> Set LIBXFS_DEBUG_WRITE_CRASH={ddev,logdev,rtdev}=nn in the environment
> to make libxfs SIGKILL itself after nn writes to the data, log, or rt
> devices. Note that this only applies to xfs_buf writes and zero_range.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
Modulo Eric's feedback:
Reviewed-by: Brian Foster <bfoster@redhat.com>
> include/linux.h | 13 ++++++++++
> libxfs/init.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++---
> libxfs/libxfs_io.h | 19 +++++++++++++++
> libxfs/rdwr.c | 6 ++++-
> 4 files changed, 101 insertions(+), 5 deletions(-)
>
>
> diff --git a/include/linux.h b/include/linux.h
> index 03b3278b..7bf59e07 100644
> --- a/include/linux.h
> +++ b/include/linux.h
> @@ -31,6 +31,8 @@
> #ifdef OVERRIDE_SYSTEM_FSXATTR
> # undef fsxattr
> #endif
> +#include <unistd.h>
> +#include <assert.h>
>
> static __inline__ int xfsctl(const char *path, int fd, int cmd, void *p)
> {
> @@ -186,6 +188,17 @@ platform_zero_range(
> #define platform_zero_range(fd, s, l) (-EOPNOTSUPP)
> #endif
>
> +/*
> + * Use SIGKILL to simulate an immediate program crash, without a chance to run
> + * atexit handlers.
> + */
> +static inline void
> +platform_crash(void)
> +{
> + kill(getpid(), SIGKILL);
> + assert(0);
> +}
> +
> /*
> * Check whether we have to define FS_IOC_FS[GS]ETXATTR ourselves. These
> * are a copy of the definitions moved to linux/uapi/fs.h in the 4.5 kernel,
> diff --git a/libxfs/init.c b/libxfs/init.c
> index 8a8ce3c4..1ec83791 100644
> --- a/libxfs/init.c
> +++ b/libxfs/init.c
> @@ -590,7 +590,8 @@ libxfs_initialize_perag(
> static struct xfs_buftarg *
> libxfs_buftarg_alloc(
> struct xfs_mount *mp,
> - dev_t dev)
> + dev_t dev,
> + unsigned long write_fails)
> {
> struct xfs_buftarg *btp;
>
> @@ -603,10 +604,29 @@ libxfs_buftarg_alloc(
> btp->bt_mount = mp;
> btp->bt_bdev = dev;
> btp->flags = 0;
> + if (write_fails) {
> + btp->writes_left = write_fails;
> + btp->flags |= XFS_BUFTARG_INJECT_WRITE_FAIL;
> + }
> + pthread_mutex_init(&btp->lock, NULL);
>
> return btp;
> }
>
> +enum libxfs_write_failure_nums {
> + WF_DATA = 0,
> + WF_LOG,
> + WF_RT,
> + WF_MAX_OPTS,
> +};
> +
> +static char *wf_opts[] = {
> + [WF_DATA] = "ddev",
> + [WF_LOG] = "logdev",
> + [WF_RT] = "rtdev",
> + [WF_MAX_OPTS] = NULL,
> +};
> +
> void
> libxfs_buftarg_init(
> struct xfs_mount *mp,
> @@ -614,6 +634,46 @@ libxfs_buftarg_init(
> dev_t logdev,
> dev_t rtdev)
> {
> + char *p = getenv("LIBXFS_DEBUG_WRITE_CRASH");
> + unsigned long dfail = 0, lfail = 0, rfail = 0;
> +
> + /* Simulate utility crash after a certain number of writes. */
> + while (p && *p) {
> + char *val;
> +
> + switch (getsubopt(&p, wf_opts, &val)) {
> + case WF_DATA:
> + if (!val) {
> + fprintf(stderr,
> + _("ddev write fail requires a parameter\n"));
> + exit(1);
> + }
> + dfail = strtoul(val, NULL, 0);
> + break;
> + case WF_LOG:
> + if (!val) {
> + fprintf(stderr,
> + _("logdev write fail requires a parameter\n"));
> + exit(1);
> + }
> + lfail = strtoul(val, NULL, 0);
> + break;
> + case WF_RT:
> + if (!val) {
> + fprintf(stderr,
> + _("rtdev write fail requires a parameter\n"));
> + exit(1);
> + }
> + rfail = strtoul(val, NULL, 0);
> + break;
> + default:
> + fprintf(stderr, _("unknown write fail type %s\n"),
> + val);
> + exit(1);
> + break;
> + }
> + }
> +
> if (mp->m_ddev_targp) {
> /* should already have all buftargs initialised */
> if (mp->m_ddev_targp->bt_bdev != dev ||
> @@ -647,12 +707,12 @@ libxfs_buftarg_init(
> return;
> }
>
> - mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev);
> + mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev, dfail);
> if (!logdev || logdev == dev)
> mp->m_logdev_targp = mp->m_ddev_targp;
> else
> - mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev);
> - mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev);
> + mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev, lfail);
> + mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev, rfail);
> }
>
> /*
> diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
> index c80e2d59..3cc4f4ee 100644
> --- a/libxfs/libxfs_io.h
> +++ b/libxfs/libxfs_io.h
> @@ -22,6 +22,8 @@ struct xfs_perag;
> */
> struct xfs_buftarg {
> struct xfs_mount *bt_mount;
> + pthread_mutex_t lock;
> + unsigned long writes_left;
> dev_t bt_bdev;
> unsigned int flags;
> };
> @@ -30,6 +32,23 @@ struct xfs_buftarg {
> #define XFS_BUFTARG_LOST_WRITE (1 << 0)
> /* A dirty buffer failed the write verifier. */
> #define XFS_BUFTARG_CORRUPT_WRITE (1 << 1)
> +/* Simulate failure after a certain number of writes. */
> +#define XFS_BUFTARG_INJECT_WRITE_FAIL (1 << 2)
> +
> +/* Simulate the system crashing after a certain number of writes. */
> +static inline void
> +xfs_buftarg_trip_write(
> + struct xfs_buftarg *btp)
> +{
> + if (!(btp->flags & XFS_BUFTARG_INJECT_WRITE_FAIL))
> + return;
> +
> + pthread_mutex_lock(&btp->lock);
> + btp->writes_left--;
> + if (!btp->writes_left)
> + platform_crash();
> + pthread_mutex_unlock(&btp->lock);
> +}
>
> extern void libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev,
> dev_t logdev, dev_t rtdev);
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index ca272387..fd456d6b 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -74,8 +74,10 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
> /* try to use special zeroing methods, fall back to writes if needed */
> len_bytes = LIBXFS_BBTOOFF64(len);
> error = platform_zero_range(fd, start_offset, len_bytes);
> - if (!error)
> + if (!error) {
> + xfs_buftarg_trip_write(btp);
> return 0;
> + }
>
> zsize = min(BDSTRAT_SIZE, BBTOB(len));
> if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
> @@ -105,6 +107,7 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
> progname, __FUNCTION__);
> exit(1);
> }
> + xfs_buftarg_trip_write(btp);
> offset += bytes;
> }
> free(z);
> @@ -860,6 +863,7 @@ libxfs_bwrite(
> } else {
> bp->b_flags |= LIBXFS_B_UPTODATE;
> bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED);
> + xfs_buftarg_trip_write(bp->b_target);
> }
> return bp->b_error;
> }
>
next prev parent reply other threads:[~2021-02-22 14:13 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-19 3:17 [PATCHSET v2 0/4] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
2021-02-19 3:17 ` [PATCH 1/4] xfs_repair: set NEEDSREPAIR the first time we write to a filesystem Darrick J. Wong
2021-02-20 0:32 ` Eric Sandeen
2021-02-20 0:47 ` Darrick J. Wong
2021-02-22 14:11 ` Brian Foster
2021-02-19 3:18 ` [PATCH 2/4] libxfs: simulate system failure after a certain number of writes Darrick J. Wong
2021-02-20 0:51 ` Eric Sandeen
2021-02-20 1:15 ` Darrick J. Wong
2021-02-22 14:11 ` Brian Foster [this message]
2021-02-19 3:18 ` [PATCH 3/4] xfs_repair: factor phase transitions into a helper Darrick J. Wong
2021-02-20 0:58 ` Eric Sandeen
2021-02-22 14:11 ` Brian Foster
2021-02-19 3:18 ` [PATCH 4/4] xfs_repair: add post-phase error injection points Darrick J. Wong
2021-02-20 1:00 ` Eric Sandeen
2021-02-22 14:11 ` Brian Foster
2021-02-23 3:01 [PATCHSET v3 0/4] xfs_repair: set needsrepair when dirtying filesystems Darrick J. Wong
2021-02-23 3:01 ` [PATCH 2/4] libxfs: simulate system failure after a certain number of writes Darrick J. Wong
2021-02-24 5:39 ` Allison Henderson
2021-02-25 8:17 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210222141132.GB886774@bfoster \
--to=bfoster@redhat.com \
--cc=djwong@kernel.org \
--cc=linux-xfs@vger.kernel.org \
--cc=sandeen@sandeen.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).