All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] generic: test for seeing unseen fsync errors on newly open files
@ 2018-04-27 16:38 Jeff Layton
  2018-04-27 16:58 ` Andres Freund
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Jeff Layton @ 2018-04-27 16:38 UTC (permalink / raw)
  To: fstests, eguan; +Cc: willy, andres, david

From: Jeff Layton <jlayton@redhat.com>

This adds a regression test for the following kernel patch:

    errseq: Always report a writeback error once

This is motivated by some rather odd behavior done by the PostgreSQL
project. The main database writers will offload the fsync calls to a
separate process, which can open files after a writeback error has
already occurred.

This used to work with older kernels that reported the error to only
one fd, but with the errseq_t changes we lost the ability to see
errors that occurred before the open. The above patch restores that
behavior.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---

This patch currently fails on mainline kernels, but I'll be sending
a pull request to Linus in the near future for the above patch.

 src/Makefile               |   2 +-
 src/fsync-open-after-err.c | 167 +++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/999          |  95 ++++++++++++++++++++++++++
 tests/generic/999.out      |   3 +
 tests/generic/group        |   1 +
 5 files changed, 267 insertions(+), 1 deletion(-)
 create mode 100644 src/fsync-open-after-err.c
 create mode 100755 tests/generic/999
 create mode 100644 tests/generic/999.out

diff --git a/src/Makefile b/src/Makefile
index 0d3feae1eeb2..3dc9b0da9c3a 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -15,7 +15,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
 	holetest t_truncate_self t_mmap_dio af_unix t_mmap_stale_pmd \
 	t_mmap_cow_race t_mmap_fallocate fsync-err t_mmap_write_ro \
 	t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \
-	t_ofd_locks
+	t_ofd_locks fsync-open-after-err
 
 LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
 	preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
diff --git a/src/fsync-open-after-err.c b/src/fsync-open-after-err.c
new file mode 100644
index 000000000000..3dcf936eb94a
--- /dev/null
+++ b/src/fsync-open-after-err.c
@@ -0,0 +1,167 @@
+/*
+ * fsync-err.c: test whether writeback errors are reported to all open fds
+ * 		and properly cleared as expected after being seen once on each
+ *
+ * Copyright (c) 2017: Jeff Layton <jlayton@redhat.com>
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <stdbool.h>
+
+/*
+ * btrfs has a fixed stripewidth of 64k, so we need to write enough data to
+ * ensure that we hit both stripes by default.
+ */
+#define DEFAULT_BUFSIZE (65 * 1024)
+
+/* default number of fds to open */
+#define DEFAULT_NUM_FDS	10
+
+bool use_sync_file_range;
+
+static void usage()
+{
+	printf("Usage: fsync-open-after-err [ -b bufsize ] -d dmerror path <filename>\n");
+}
+
+int main(int argc, char **argv)
+{
+	int ret, i, fd1, fd2;
+	char *fname, *buf;
+	char *dmerror_path = NULL;
+	char *cmdbuf;
+	size_t cmdsize, bufsize = DEFAULT_BUFSIZE;
+
+	while ((i = getopt(argc, argv, "b:d:n:sS")) != -1) {
+		switch (i) {
+		case 'b':
+			bufsize = strtol(optarg, &buf, 0);
+			if (*buf != '\0') {
+				printf("bad string conversion: %s\n", optarg);
+				return 1;
+			}
+			break;
+		case 'd':
+			dmerror_path = optarg;
+			break;
+		}
+	}
+
+	if (argc < 1) {
+		usage();
+		return 1;
+	}
+
+	if (!dmerror_path) {
+		printf("Must specify dmerror path with -d option!\n");
+		return 1;
+	}
+
+	/* Remaining argument is filename */
+	fname = argv[optind];
+
+	fd1 = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+	if (fd1 < 0) {
+		printf("open of fd1 failed: %m\n");
+		return 1;
+	}
+
+	buf = malloc(bufsize);
+	if (!buf) {
+		printf("malloc failed: %m\n");
+		return 1;
+	}
+
+	/* fill it with some junk */
+	memset(buf, 0x7c, bufsize);
+
+	ret = pwrite(fd1, buf, bufsize, 0);
+	if (ret < 0) {
+		printf("First write on fd1 failed: %m\n");
+		return 1;
+	}
+
+	ret = fsync(fd1);
+	if (ret < 0) {
+		printf("First fsync on fd1 failed: %m\n");
+		return 1;
+	}
+
+	/* enough for path + dmerror command string  (and then some) */
+	cmdsize = strlen(dmerror_path) + 64;
+
+	cmdbuf = malloc(cmdsize);
+	if (!cmdbuf) {
+		printf("malloc failed: %m\n");
+		return 1;
+	}
+
+	ret = snprintf(cmdbuf, cmdsize, "%s load_error_table", dmerror_path);
+	if (ret < 0 || ret >= cmdsize) {
+		printf("sprintf failure: %d\n", ret);
+		return 1;
+	}
+
+	/* flip the device to non-working mode */
+	ret = system(cmdbuf);
+	if (ret) {
+		if (WIFEXITED(ret))
+			printf("system: program exited: %d\n",
+					WEXITSTATUS(ret));
+		else
+			printf("system: 0x%x\n", (int)ret);
+
+		return 1;
+	}
+
+	ret = pwrite(fd1, buf, bufsize, 0);
+	if (ret < 0) {
+		printf("Second write on fd1 failed: %m\n");
+		return 1;
+	}
+
+	/* Ensure writeback occurs, but don't scrape the error */
+	sync();
+
+	/* flip the device to working mode */
+	ret = snprintf(cmdbuf, cmdsize, "%s load_working_table", dmerror_path);
+	if (ret < 0 || ret >= cmdsize) {
+		printf("sprintf failure: %d\n", ret);
+		return 1;
+	}
+
+	ret = system(cmdbuf);
+	if (ret) {
+		if (WIFEXITED(ret))
+			printf("system: program exited: %d\n",
+					WEXITSTATUS(ret));
+		else
+			printf("system: 0x%x\n", (int)ret);
+
+		return 1;
+	}
+
+
+	fd2 = open(fname, O_WRONLY, 0644);
+	if (fd2 < 0) {
+		printf("Open of fd2 failed: %m\n");
+		return 1;
+	}
+
+	/* We now expect an error */
+	ret = fsync(fd2);
+	if (ret >= 0) {
+		printf("Success on fsync on fd2!\n");
+		return 1;
+	}
+
+	printf("Test passed!\n");
+	return 0;
+}
diff --git a/tests/generic/999 b/tests/generic/999
new file mode 100755
index 000000000000..c46ac4bf3517
--- /dev/null
+++ b/tests/generic/999
@@ -0,0 +1,95 @@
+#! /bin/bash
+# FS QA Test No. XXX
+#
+# Open a file several times, write to it, fsync on all fds and make sure that
+# they all return 0. Change the device to start throwing errors. Write again
+# on all fds and fsync on all fds. Ensure that we get errors on all of them.
+# Then fsync on all one last time and verify that all return 0.
+#
+#-----------------------------------------------------------------------
+# Copyright (c) 2018, Jeff Layton <jlayton@redhat.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#-----------------------------------------------------------------------
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1    # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	rm -rf $tmp.* $testdir
+	_dmerror_cleanup
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/dmerror
+
+# real QA test starts here
+_supported_os Linux
+_require_scratch
+# This test uses "dm" without taking into account the data could be on
+# realtime subvolume, thus the test will fail with rtinherit=1
+_require_no_rtinherit
+
+# Generally, we want to avoid journal errors on the extended testcase. Only
+# unset the -s flag if we have a logdev
+# case $FSTYP in
+# 	btrfs)
+# 		_notrun "btrfs has a specialized test for this"
+# 		;;
+# 	ext3|ext4|gfs2|xfs)
+# 		# Do the more thorough test if we have a logdev
+# 		_has_logdev && sflag=''
+# 		;;
+# 	*)
+# 		;;
+# esac
+
+_require_dm_target error
+_require_test_program fsync-open-after-err
+_require_test_program dmerror
+
+rm -f $seqres.full
+
+echo "Format and mount"
+_scratch_mkfs > $seqres.full 2>&1
+_dmerror_init
+_dmerror_mount
+
+_require_fs_space $SCRATCH_MNT 65536
+
+testfile=$SCRATCH_MNT/fsync-open-after-err
+
+echo "$here/src/fsync-open-after-err -d $here/src/dmerror $testfile" >> $seqres.full
+$here/src/fsync-open-after-err -d $here/src/dmerror $testfile
+
+# success, all done
+_dmerror_load_working_table
+_dmerror_unmount
+_dmerror_cleanup
+
+# fs may be corrupt after this -- attempt to repair it
+_repair_scratch_fs >> $seqres.full
+
+status=0
+exit
diff --git a/tests/generic/999.out b/tests/generic/999.out
new file mode 100644
index 000000000000..2e48492ff6d1
--- /dev/null
+++ b/tests/generic/999.out
@@ -0,0 +1,3 @@
+QA output created by 999
+Format and mount
+Test passed!
diff --git a/tests/generic/group b/tests/generic/group
index ea8e51b35e79..48f491a5c32b 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -486,3 +486,4 @@
 481 auto quick log metadata
 482 auto metadata replay
 483 auto quick log metadata
+999 auto quick
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] generic: test for seeing unseen fsync errors on newly open files
  2018-04-27 16:38 [PATCH] generic: test for seeing unseen fsync errors on newly open files Jeff Layton
@ 2018-04-27 16:58 ` Andres Freund
  2018-04-27 17:20   ` Jeff Layton
  2018-04-28  7:27 ` Amir Goldstein
  2018-04-28 14:59 ` [PATCH v2] " Jeff Layton
  2 siblings, 1 reply; 10+ messages in thread
From: Andres Freund @ 2018-04-27 16:58 UTC (permalink / raw)
  To: Jeff Layton; +Cc: fstests, eguan, willy, david

Hi,

On 2018-04-27 12:38:33 -0400, Jeff Layton wrote:
> This is motivated by some rather odd behavior done by the PostgreSQL
> project. The main database writers will offload the fsync calls to a
> separate process, which can open files after a writeback error has
> already occurred.

;)


> +	ret = pwrite(fd1, buf, bufsize, 0);
> +	if (ret < 0) {
> +		printf("Second write on fd1 failed: %m\n");
> +		return 1;
> +	}
> +
> +	/* Ensure writeback occurs, but don't scrape the error */
> +	sync();

It might be a good idea to also add a second version of this that
additionally evicts inodes after the sync? I think that should be
simulatable with "echo 2 > /proc/sys/vm/drop_caches" or such? That'd
obviously fail for now...

Thanks for the test,

Andres Freund

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] generic: test for seeing unseen fsync errors on newly open files
  2018-04-27 16:58 ` Andres Freund
@ 2018-04-27 17:20   ` Jeff Layton
  0 siblings, 0 replies; 10+ messages in thread
From: Jeff Layton @ 2018-04-27 17:20 UTC (permalink / raw)
  To: Andres Freund; +Cc: fstests, eguan, willy, david

On Fri, 2018-04-27 at 09:58 -0700, Andres Freund wrote:
> Hi,
> 
> On 2018-04-27 12:38:33 -0400, Jeff Layton wrote:
> > This is motivated by some rather odd behavior done by the PostgreSQL
> > project. The main database writers will offload the fsync calls to a
> > separate process, which can open files after a writeback error has
> > already occurred.
> 
> ;)
> 
> 
> > +	ret = pwrite(fd1, buf, bufsize, 0);
> > +	if (ret < 0) {
> > +		printf("Second write on fd1 failed: %m\n");
> > +		return 1;
> > +	}
> > +
> > +	/* Ensure writeback occurs, but don't scrape the error */
> > +	sync();
> 
> It might be a good idea to also add a second version of this that
> additionally evicts inodes after the sync? I think that should be
> simulatable with "echo 2 > /proc/sys/vm/drop_caches" or such? That'd
> obviously fail for now...
> 

We could. In this test, I'm explicitly holding the fd1 open it while we
open fd2 after the sync to ensure that it sticks around in the cache.

TBH: I'm a little leery of keeping inodes with errors on them in the
cache as it seems like an open-ended commitment. We may not be able to
keep them forever and may eventually have to eliminate some in order to
make forward progress.

If we do go that route, we may very well need drop_caches to purge
those inodes as well, to give admins a way to clear such inodes
globally without having to explicitly open+fsync them.

In any case, I'd prefer to wait until we at least have a proposed patch
for that piece before we go adding tests for it.

> Thanks for the test,
> 
> Andres Freund
> --
> To unsubscribe from this list: send the line "unsubscribe fstests" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] generic: test for seeing unseen fsync errors on newly open files
  2018-04-27 16:38 [PATCH] generic: test for seeing unseen fsync errors on newly open files Jeff Layton
  2018-04-27 16:58 ` Andres Freund
@ 2018-04-28  7:27 ` Amir Goldstein
  2018-04-28 12:05   ` Jeff Layton
  2018-04-28 14:59 ` [PATCH v2] " Jeff Layton
  2 siblings, 1 reply; 10+ messages in thread
From: Amir Goldstein @ 2018-04-28  7:27 UTC (permalink / raw)
  To: Jeff Layton; +Cc: fstests, Eryu Guan, willy, andres, Dave Chinner

On Fri, Apr 27, 2018 at 9:38 AM, Jeff Layton <jlayton@kernel.org> wrote:
> From: Jeff Layton <jlayton@redhat.com>
>
> This adds a regression test for the following kernel patch:
>
>     errseq: Always report a writeback error once
>
> This is motivated by some rather odd behavior done by the PostgreSQL
> project. The main database writers will offload the fsync calls to a
> separate process, which can open files after a writeback error has
> already occurred.
>
> This used to work with older kernels that reported the error to only
> one fd, but with the errseq_t changes we lost the ability to see
> errors that occurred before the open. The above patch restores that
> behavior.
>
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
>
> This patch currently fails on mainline kernels, but I'll be sending
> a pull request to Linus in the near future for the above patch.
>
>  src/Makefile               |   2 +-
>  src/fsync-open-after-err.c | 167 +++++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/999          |  95 ++++++++++++++++++++++++++
>  tests/generic/999.out      |   3 +
>  tests/generic/group        |   1 +
>  5 files changed, 267 insertions(+), 1 deletion(-)
>  create mode 100644 src/fsync-open-after-err.c
>  create mode 100755 tests/generic/999
>  create mode 100644 tests/generic/999.out
>
> diff --git a/src/Makefile b/src/Makefile
> index 0d3feae1eeb2..3dc9b0da9c3a 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -15,7 +15,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
>         holetest t_truncate_self t_mmap_dio af_unix t_mmap_stale_pmd \
>         t_mmap_cow_race t_mmap_fallocate fsync-err t_mmap_write_ro \
>         t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \
> -       t_ofd_locks
> +       t_ofd_locks fsync-open-after-err
>
>  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
>         preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
> diff --git a/src/fsync-open-after-err.c b/src/fsync-open-after-err.c
> new file mode 100644
> index 000000000000..3dcf936eb94a
> --- /dev/null
> +++ b/src/fsync-open-after-err.c

Jeff,

It is an anti pattern for xfstests to add a single purpose C program
for things that could be implemented otherwise.

AFAICT, This program doesn't do anything that you cannot do with
existing bash helpers and existing programs.

So either add a flag to fsync-err to enable the new test
or use xfs_io fsync (make sure it really returns the error) and
keep file open with bash tricks.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] generic: test for seeing unseen fsync errors on newly open files
  2018-04-28  7:27 ` Amir Goldstein
@ 2018-04-28 12:05   ` Jeff Layton
  0 siblings, 0 replies; 10+ messages in thread
From: Jeff Layton @ 2018-04-28 12:05 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: fstests, Eryu Guan, willy, andres, Dave Chinner

On Sat, 2018-04-28 at 00:27 -0700, Amir Goldstein wrote:
> On Fri, Apr 27, 2018 at 9:38 AM, Jeff Layton <jlayton@kernel.org> wrote:
> > From: Jeff Layton <jlayton@redhat.com>
> > 
> > This adds a regression test for the following kernel patch:
> > 
> >     errseq: Always report a writeback error once
> > 
> > This is motivated by some rather odd behavior done by the PostgreSQL
> > project. The main database writers will offload the fsync calls to a
> > separate process, which can open files after a writeback error has
> > already occurred.
> > 
> > This used to work with older kernels that reported the error to only
> > one fd, but with the errseq_t changes we lost the ability to see
> > errors that occurred before the open. The above patch restores that
> > behavior.
> > 
> > Signed-off-by: Jeff Layton <jlayton@redhat.com>
> > ---
> > 
> > This patch currently fails on mainline kernels, but I'll be sending
> > a pull request to Linus in the near future for the above patch.
> > 
> >  src/Makefile               |   2 +-
> >  src/fsync-open-after-err.c | 167 +++++++++++++++++++++++++++++++++++++++++++++
> >  tests/generic/999          |  95 ++++++++++++++++++++++++++
> >  tests/generic/999.out      |   3 +
> >  tests/generic/group        |   1 +
> >  5 files changed, 267 insertions(+), 1 deletion(-)
> >  create mode 100644 src/fsync-open-after-err.c
> >  create mode 100755 tests/generic/999
> >  create mode 100644 tests/generic/999.out
> > 
> > diff --git a/src/Makefile b/src/Makefile
> > index 0d3feae1eeb2..3dc9b0da9c3a 100644
> > --- a/src/Makefile
> > +++ b/src/Makefile
> > @@ -15,7 +15,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
> >         holetest t_truncate_self t_mmap_dio af_unix t_mmap_stale_pmd \
> >         t_mmap_cow_race t_mmap_fallocate fsync-err t_mmap_write_ro \
> >         t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \
> > -       t_ofd_locks
> > +       t_ofd_locks fsync-open-after-err
> > 
> >  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
> >         preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
> > diff --git a/src/fsync-open-after-err.c b/src/fsync-open-after-err.c
> > new file mode 100644
> > index 000000000000..3dcf936eb94a
> > --- /dev/null
> > +++ b/src/fsync-open-after-err.c
> 
> Jeff,
> 
> It is an anti pattern for xfstests to add a single purpose C program
> for things that could be implemented otherwise.
> 
> AFAICT, This program doesn't do anything that you cannot do with
> existing bash helpers and existing programs.
> 
> So either add a flag to fsync-err to enable the new test
> or use xfs_io fsync (make sure it really returns the error) and
> keep file open with bash tricks.
> 
> Thanks,
> Amir.

Ok. Let's drop this patch for now and I'll see if I can code it up with
scripts somehow.

Thanks,
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] generic: test for seeing unseen fsync errors on newly open files
  2018-04-27 16:38 [PATCH] generic: test for seeing unseen fsync errors on newly open files Jeff Layton
  2018-04-27 16:58 ` Andres Freund
  2018-04-28  7:27 ` Amir Goldstein
@ 2018-04-28 14:59 ` Jeff Layton
  2018-04-28 15:19   ` Amir Goldstein
  2018-04-28 23:06   ` [PATCH v3] " Jeff Layton
  2 siblings, 2 replies; 10+ messages in thread
From: Jeff Layton @ 2018-04-28 14:59 UTC (permalink / raw)
  To: fstests, eguan; +Cc: willy, andres, david, amir73il

From: Jeff Layton <jlayton@redhat.com>

This adds a regression test for the following kernel patch:

    errseq: Always report a writeback error once

This is motivated by some rather odd behavior done by the PostgreSQL
project. The main database writers will offload the fsync calls to a
separate process, which can open files after a writeback error has
already occurred.

This used to work with older kernels that reported the error to only
one fd, but with the errseq_t changes we lost the ability to see
errors that occurred before the open. The above patch restores that
behavior.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 tests/generic/999     | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/999.out |  5 +++
 tests/generic/group   |  1 +
 3 files changed, 105 insertions(+)
 create mode 100755 tests/generic/999
 create mode 100644 tests/generic/999.out

diff --git a/tests/generic/999 b/tests/generic/999
new file mode 100755
index 000000000000..081409f16100
--- /dev/null
+++ b/tests/generic/999
@@ -0,0 +1,99 @@
+#! /bin/bash
+# FS QA Test No. XXX
+#
+# Open a file several times, write to it, fsync on all fds and make sure that
+# they all return 0. Change the device to start throwing errors. Write again
+# on all fds and fsync on all fds. Ensure that we get errors on all of them.
+# Then fsync on all one last time and verify that all return 0.
+#
+#-----------------------------------------------------------------------
+# Copyright (c) 2018, Jeff Layton <jlayton@redhat.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#-----------------------------------------------------------------------
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1    # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	rm -rf $tmp.* $testdir
+	_dmerror_cleanup
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/dmerror
+
+# real QA test starts here
+_supported_os Linux
+_require_scratch
+# This test uses "dm" without taking into account the data could be on
+# realtime subvolume, thus the test will fail with rtinherit=1
+_require_no_rtinherit
+
+_require_dm_target error
+_require_test_program fsync-open-after-err
+_require_test_program dmerror
+
+rm -f $seqres.full
+
+echo "Format and mount"
+_scratch_mkfs > $seqres.full 2>&1
+_dmerror_init
+_dmerror_mount
+
+datalen=65536
+_require_fs_space $SCRATCH_MNT $datalen
+
+# use fd 5 to hold file open
+testfile=$SCRATCH_MNT/fsync-open-after-err
+exec 5>$testfile
+
+# write some data to file and fsync it out
+$XFS_IO_PROG -c "pwrite -q 0 $datalen" -c fsync $testfile
+
+# flip device to non-working mode
+_dmerror_load_error_table
+
+# rewrite the data, call sync to ensure it's written back w/o scraping error
+$XFS_IO_PROG -c "pwrite -q 0 $datalen" -c sync $testfile
+
+# heal the device error
+_dmerror_load_working_table
+
+# open again and call fsync
+echo "The following fsync should fail with EIO:"
+$XFS_IO_PROG -c fsync $testfile
+echo "done"
+
+# close file
+exec 5>&-
+
+# success, all done
+_dmerror_unmount
+_dmerror_cleanup
+
+# fs may be corrupt after this -- attempt to repair it
+_repair_scratch_fs >> $seqres.full
+status=0
+exit
diff --git a/tests/generic/999.out b/tests/generic/999.out
new file mode 100644
index 000000000000..38d2d7f6495f
--- /dev/null
+++ b/tests/generic/999.out
@@ -0,0 +1,5 @@
+QA output created by 999
+Format and mount
+The following fsync should fail with EIO:
+fsync: Input/output error
+done
diff --git a/tests/generic/group b/tests/generic/group
index ea8e51b35e79..48f491a5c32b 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -486,3 +486,4 @@
 481 auto quick log metadata
 482 auto metadata replay
 483 auto quick log metadata
+999 auto quick
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] generic: test for seeing unseen fsync errors on newly open files
  2018-04-28 14:59 ` [PATCH v2] " Jeff Layton
@ 2018-04-28 15:19   ` Amir Goldstein
  2018-04-28 23:06   ` [PATCH v3] " Jeff Layton
  1 sibling, 0 replies; 10+ messages in thread
From: Amir Goldstein @ 2018-04-28 15:19 UTC (permalink / raw)
  To: Jeff Layton; +Cc: fstests, Eryu Guan, willy, andres, Dave Chinner

On Sat, Apr 28, 2018 at 7:59 AM, Jeff Layton <jlayton@kernel.org> wrote:
> From: Jeff Layton <jlayton@redhat.com>
>
> This adds a regression test for the following kernel patch:
>
>     errseq: Always report a writeback error once
>
> This is motivated by some rather odd behavior done by the PostgreSQL
> project. The main database writers will offload the fsync calls to a
> separate process, which can open files after a writeback error has
> already occurred.
>
> This used to work with older kernels that reported the error to only
> one fd, but with the errseq_t changes we lost the ability to see
> errors that occurred before the open. The above patch restores that
> behavior.
>
> Signed-off-by: Jeff Layton <jlayton@redhat.com>

Looks good. Minus a few nits, you can add:

Reviewed-by: Amir Goldstein <amir73il@gmail.com>


> ---
>  tests/generic/999     | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/999.out |  5 +++
>  tests/generic/group   |  1 +
>  3 files changed, 105 insertions(+)
>  create mode 100755 tests/generic/999
>  create mode 100644 tests/generic/999.out
>
> diff --git a/tests/generic/999 b/tests/generic/999
> new file mode 100755
> index 000000000000..081409f16100
> --- /dev/null
> +++ b/tests/generic/999
> @@ -0,0 +1,99 @@
> +#! /bin/bash
> +# FS QA Test No. XXX
> +#
> +# Open a file several times, write to it, fsync on all fds and make sure that
> +# they all return 0. Change the device to start throwing errors. Write again
> +# on all fds and fsync on all fds. Ensure that we get errors on all of them.
> +# Then fsync on all one last time and verify that all return 0.
> +#
> +#-----------------------------------------------------------------------
> +# Copyright (c) 2018, Jeff Layton <jlayton@redhat.com>
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#-----------------------------------------------------------------------
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1    # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +       cd /
> +       rm -rf $tmp.* $testdir
> +       _dmerror_cleanup
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +. ./common/dmerror
> +
> +# real QA test starts here
> +_supported_os Linux
> +_require_scratch

Better _require_scratch_nocheck if you expect fs errors.

> +# This test uses "dm" without taking into account the data could be on
> +# realtime subvolume, thus the test will fail with rtinherit=1
> +_require_no_rtinherit
> +
> +_require_dm_target error
> +_require_test_program fsync-open-after-err
> +_require_test_program dmerror

Does it really require those programs?
I think you managed without them..

> +
> +rm -f $seqres.full
> +
> +echo "Format and mount"
> +_scratch_mkfs > $seqres.full 2>&1
> +_dmerror_init
> +_dmerror_mount
> +
> +datalen=65536
> +_require_fs_space $SCRATCH_MNT $datalen
> +
> +# use fd 5 to hold file open
> +testfile=$SCRATCH_MNT/fsync-open-after-err
> +exec 5>$testfile
> +
> +# write some data to file and fsync it out
> +$XFS_IO_PROG -c "pwrite -q 0 $datalen" -c fsync $testfile
> +
> +# flip device to non-working mode
> +_dmerror_load_error_table
> +
> +# rewrite the data, call sync to ensure it's written back w/o scraping error
> +$XFS_IO_PROG -c "pwrite -q 0 $datalen" -c sync $testfile
> +
> +# heal the device error
> +_dmerror_load_working_table
> +
> +# open again and call fsync
> +echo "The following fsync should fail with EIO:"
> +$XFS_IO_PROG -c fsync $testfile
> +echo "done"
> +
> +# close file
> +exec 5>&-
> +
> +# success, all done
> +_dmerror_unmount
> +_dmerror_cleanup
> +
> +# fs may be corrupt after this -- attempt to repair it
> +_repair_scratch_fs >> $seqres.full

Do we need that? Doesn't seem relevant to the test??

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] generic: test for seeing unseen fsync errors on newly open files
  2018-04-28 14:59 ` [PATCH v2] " Jeff Layton
  2018-04-28 15:19   ` Amir Goldstein
@ 2018-04-28 23:06   ` Jeff Layton
  2018-05-02  5:50     ` Eryu Guan
  1 sibling, 1 reply; 10+ messages in thread
From: Jeff Layton @ 2018-04-28 23:06 UTC (permalink / raw)
  To: fstests, eguan; +Cc: willy, andres, david, amir73il

From: Jeff Layton <jlayton@redhat.com>

This adds a regression test for the following kernel patch:

    errseq: Always report a writeback error once

This is motivated by some rather odd behavior done by the PostgreSQL
project. The main database writers will offload the fsync calls to a
separate process, which can open files after a writeback error has
already occurred.

This used to work with older kernels that reported the error to only
one fd, but with the errseq_t changes we lost the ability to see
errors that occurred before the open. The above patch restores that
behavior.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
---
 tests/generic/999     | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/999.out |  5 +++
 tests/generic/group   |  1 +
 3 files changed, 101 insertions(+)
 create mode 100755 tests/generic/999
 create mode 100644 tests/generic/999.out

diff --git a/tests/generic/999 b/tests/generic/999
new file mode 100755
index 000000000000..b816fd3bb423
--- /dev/null
+++ b/tests/generic/999
@@ -0,0 +1,95 @@
+#! /bin/bash
+# FS QA Test No. XXX
+#
+# Open a file several times, write to it, fsync on all fds and make sure that
+# they all return 0. Change the device to start throwing errors. Write again
+# on all fds and fsync on all fds. Ensure that we get errors on all of them.
+# Then fsync on all one last time and verify that all return 0.
+#
+#-----------------------------------------------------------------------
+# Copyright (c) 2018, Jeff Layton <jlayton@redhat.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#-----------------------------------------------------------------------
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1    # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	rm -rf $tmp.* $testdir
+	_dmerror_cleanup
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/dmerror
+
+# real QA test starts here
+_supported_os Linux
+_require_scratch_nocheck
+# This test uses "dm" without taking into account the data could be on
+# realtime subvolume, thus the test will fail with rtinherit=1
+_require_no_rtinherit
+
+_require_dm_target error
+
+rm -f $seqres.full
+
+echo "Format and mount"
+_scratch_mkfs > $seqres.full 2>&1
+_dmerror_init
+_dmerror_mount
+
+datalen=65536
+_require_fs_space $SCRATCH_MNT $datalen
+
+# use fd 5 to hold file open
+testfile=$SCRATCH_MNT/fsync-open-after-err
+exec 5>$testfile
+
+# write some data to file and fsync it out
+$XFS_IO_PROG -c "pwrite -q 0 $datalen" -c fsync $testfile
+
+# flip device to non-working mode
+_dmerror_load_error_table
+
+# rewrite the data, call sync to ensure it's written back w/o scraping error
+$XFS_IO_PROG -c "pwrite -q 0 $datalen" -c sync $testfile
+
+# heal the device error
+_dmerror_load_working_table
+
+# open again and call fsync
+echo "The following fsync should fail with EIO:"
+$XFS_IO_PROG -c fsync $testfile
+echo "done"
+
+# close file
+exec 5>&-
+
+# success, all done
+_dmerror_unmount
+_dmerror_cleanup
+
+status=0
+exit
diff --git a/tests/generic/999.out b/tests/generic/999.out
new file mode 100644
index 000000000000..38d2d7f6495f
--- /dev/null
+++ b/tests/generic/999.out
@@ -0,0 +1,5 @@
+QA output created by 999
+Format and mount
+The following fsync should fail with EIO:
+fsync: Input/output error
+done
diff --git a/tests/generic/group b/tests/generic/group
index ea8e51b35e79..48f491a5c32b 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -486,3 +486,4 @@
 481 auto quick log metadata
 482 auto metadata replay
 483 auto quick log metadata
+999 auto quick
-- 
2.14.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] generic: test for seeing unseen fsync errors on newly open files
  2018-04-28 23:06   ` [PATCH v3] " Jeff Layton
@ 2018-05-02  5:50     ` Eryu Guan
  2018-05-08 12:46       ` Jeff Layton
  0 siblings, 1 reply; 10+ messages in thread
From: Eryu Guan @ 2018-05-02  5:50 UTC (permalink / raw)
  To: Jeff Layton; +Cc: fstests, willy, andres, david, amir73il

On Sat, Apr 28, 2018 at 07:06:27PM -0400, Jeff Layton wrote:
> From: Jeff Layton <jlayton@redhat.com>
> 
> This adds a regression test for the following kernel patch:
> 
>     errseq: Always report a writeback error once
> 
> This is motivated by some rather odd behavior done by the PostgreSQL
> project. The main database writers will offload the fsync calls to a
> separate process, which can open files after a writeback error has
> already occurred.
> 
> This used to work with older kernels that reported the error to only
> one fd, but with the errseq_t changes we lost the ability to see
> errors that occurred before the open. The above patch restores that
> behavior.
> 
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  tests/generic/999     | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/generic/999.out |  5 +++
>  tests/generic/group   |  1 +
>  3 files changed, 101 insertions(+)
>  create mode 100755 tests/generic/999
>  create mode 100644 tests/generic/999.out
> 
> diff --git a/tests/generic/999 b/tests/generic/999
> new file mode 100755
> index 000000000000..b816fd3bb423
> --- /dev/null
> +++ b/tests/generic/999
> @@ -0,0 +1,95 @@
> +#! /bin/bash
> +# FS QA Test No. XXX
                    ^^^ Need a test number here :)
> +#
> +# Open a file several times, write to it, fsync on all fds and make sure that
> +# they all return 0. Change the device to start throwing errors. Write again
> +# on all fds and fsync on all fds. Ensure that we get errors on all of them.
> +# Then fsync on all one last time and verify that all return 0.
> +#
> +#-----------------------------------------------------------------------
> +# Copyright (c) 2018, Jeff Layton <jlayton@redhat.com>
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#-----------------------------------------------------------------------
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1    # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -rf $tmp.* $testdir
> +	_dmerror_cleanup
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +. ./common/dmerror
> +
> +# real QA test starts here
> +_supported_os Linux
> +_require_scratch_nocheck
> +# This test uses "dm" without taking into account the data could be on
> +# realtime subvolume, thus the test will fail with rtinherit=1
> +_require_no_rtinherit
> +
> +_require_dm_target error
> +
> +rm -f $seqres.full
> +
> +echo "Format and mount"
> +_scratch_mkfs > $seqres.full 2>&1
> +_dmerror_init
> +_dmerror_mount
> +
> +datalen=65536
> +_require_fs_space $SCRATCH_MNT $datalen
> +
> +# use fd 5 to hold file open
> +testfile=$SCRATCH_MNT/fsync-open-after-err
> +exec 5>$testfile
> +
> +# write some data to file and fsync it out
> +$XFS_IO_PROG -c "pwrite -q 0 $datalen" -c fsync $testfile
> +
> +# flip device to non-working mode
> +_dmerror_load_error_table
> +
> +# rewrite the data, call sync to ensure it's written back w/o scraping error
> +$XFS_IO_PROG -c "pwrite -q 0 $datalen" -c sync $testfile
> +
> +# heal the device error
> +_dmerror_load_working_table
> +
> +# open again and call fsync
> +echo "The following fsync should fail with EIO:"
> +$XFS_IO_PROG -c fsync $testfile
> +echo "done"
                                                                                                                                                                                               
I built latest Linus tree, which should contain the mentioned fix, and I                                                                                                                       
saw different results on xfs, ext4 and btrfs.                                                                                                                                                  
                                                                                                                                                                                               
XFS fails the test as:                                                                                                                                                                         
    -fsync: Input/output error                                                                                                                                                                 
    +/mnt/scratch/fsync-open-after-err: Input/output error                                                                                                                                     
                                                                                                                                                                                               
While btrfs fails as:                                                                                                                                                                          
    -fsync: Input/output error                                                                                                                                                                 
    +/mnt/scratch/fsync-open-after-err: Read-only file system                                                                                                                                  
                                                                                                                                                                                               
And both XFS and btrfs behave in the same way no matter I have the fix                                                                                                                         
applied or not (tested on v4.16 kernel and v4.17-rc3+ kernel).                                                                                                                                 
                                                                                                                                                                                               
Only ext4 passes the test with v4.17-rc3+ kernel, and fails on v4.16                                                                                                                           
kernel as expected:                                                                                                                                                                            
    -fsync: Input/output error                                                                                                                                                                 
                                                                                                                                                                                               
Could you please take a look?                                                                                                                                                                  
                                                                                                                                                                                               
Thanks,                                                                                                                                                                                        
Eryu

> +
> +# close file
> +exec 5>&-
> +
> +# success, all done
> +_dmerror_unmount
> +_dmerror_cleanup
> +
> +status=0
> +exit
> diff --git a/tests/generic/999.out b/tests/generic/999.out
> new file mode 100644
> index 000000000000..38d2d7f6495f
> --- /dev/null
> +++ b/tests/generic/999.out
> @@ -0,0 +1,5 @@
> +QA output created by 999
> +Format and mount
> +The following fsync should fail with EIO:
> +fsync: Input/output error
> +done
> diff --git a/tests/generic/group b/tests/generic/group
> index ea8e51b35e79..48f491a5c32b 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -486,3 +486,4 @@
>  481 auto quick log metadata
>  482 auto metadata replay
>  483 auto quick log metadata
> +999 auto quick
> -- 
> 2.14.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe fstests" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] generic: test for seeing unseen fsync errors on newly open files
  2018-05-02  5:50     ` Eryu Guan
@ 2018-05-08 12:46       ` Jeff Layton
  0 siblings, 0 replies; 10+ messages in thread
From: Jeff Layton @ 2018-05-08 12:46 UTC (permalink / raw)
  To: Eryu Guan; +Cc: fstests, willy, andres, david, amir73il

On Wed, 2018-05-02 at 13:50 +0800, Eryu Guan wrote:
> On Sat, Apr 28, 2018 at 07:06:27PM -0400, Jeff Layton wrote:
> > From: Jeff Layton <jlayton@redhat.com>
> > 
> > This adds a regression test for the following kernel patch:
> > 
> >     errseq: Always report a writeback error once
> > 
> > This is motivated by some rather odd behavior done by the PostgreSQL
> > project. The main database writers will offload the fsync calls to a
> > separate process, which can open files after a writeback error has
> > already occurred.
> > 
> > This used to work with older kernels that reported the error to only
> > one fd, but with the errseq_t changes we lost the ability to see
> > errors that occurred before the open. The above patch restores that
> > behavior.
> > 
> > Signed-off-by: Jeff Layton <jlayton@redhat.com>
> > Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >  tests/generic/999     | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  tests/generic/999.out |  5 +++
> >  tests/generic/group   |  1 +
> >  3 files changed, 101 insertions(+)
> >  create mode 100755 tests/generic/999
> >  create mode 100644 tests/generic/999.out
> > 
> > diff --git a/tests/generic/999 b/tests/generic/999
> > new file mode 100755
> > index 000000000000..b816fd3bb423
> > --- /dev/null
> > +++ b/tests/generic/999
> > @@ -0,0 +1,95 @@
> > +#! /bin/bash
> > +# FS QA Test No. XXX
> 
>                     ^^^ Need a test number here :)
> > +#
> > +# Open a file several times, write to it, fsync on all fds and make sure that
> > +# they all return 0. Change the device to start throwing errors. Write again
> > +# on all fds and fsync on all fds. Ensure that we get errors on all of them.
> > +# Then fsync on all one last time and verify that all return 0.
> > +#
> > +#-----------------------------------------------------------------------
> > +# Copyright (c) 2018, Jeff Layton <jlayton@redhat.com>
> > +#
> > +# This program is free software; you can redistribute it and/or
> > +# modify it under the terms of the GNU General Public License as
> > +# published by the Free Software Foundation.
> > +#
> > +# This program is distributed in the hope that it would be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +# GNU General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU General Public License
> > +# along with this program; if not, write the Free Software Foundation,
> > +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> > +#-----------------------------------------------------------------------
> > +
> > +seq=`basename $0`
> > +seqres=$RESULT_DIR/$seq
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +tmp=/tmp/$$
> > +status=1    # failure is the default!
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +_cleanup()
> > +{
> > +	cd /
> > +	rm -rf $tmp.* $testdir
> > +	_dmerror_cleanup
> > +}
> > +
> > +# get standard environment, filters and checks
> > +. ./common/rc
> > +. ./common/filter
> > +. ./common/dmerror
> > +
> > +# real QA test starts here
> > +_supported_os Linux
> > +_require_scratch_nocheck
> > +# This test uses "dm" without taking into account the data could be on
> > +# realtime subvolume, thus the test will fail with rtinherit=1
> > +_require_no_rtinherit
> > +
> > +_require_dm_target error
> > +
> > +rm -f $seqres.full
> > +
> > +echo "Format and mount"
> > +_scratch_mkfs > $seqres.full 2>&1
> > +_dmerror_init
> > +_dmerror_mount
> > +
> > +datalen=65536
> > +_require_fs_space $SCRATCH_MNT $datalen
> > +
> > +# use fd 5 to hold file open
> > +testfile=$SCRATCH_MNT/fsync-open-after-err
> > +exec 5>$testfile
> > +
> > +# write some data to file and fsync it out
> > +$XFS_IO_PROG -c "pwrite -q 0 $datalen" -c fsync $testfile
> > +
> > +# flip device to non-working mode
> > +_dmerror_load_error_table
> > +
> > +# rewrite the data, call sync to ensure it's written back w/o scraping error
> > +$XFS_IO_PROG -c "pwrite -q 0 $datalen" -c sync $testfile
> > +
> > +# heal the device error
> > +_dmerror_load_working_table
> > +
> > +# open again and call fsync
> > +echo "The following fsync should fail with EIO:"
> > +$XFS_IO_PROG -c fsync $testfile
> > +echo "done"
> 
>                                                                                                                                                                                                
> I built latest Linus tree, which should contain the mentioned fix, and I                                                                                                                       
> saw different results on xfs, ext4 and btrfs.                                                                                                                                                  
>                                                                                                                                                                                                
> XFS fails the test as:                                                                                                                                                                         
>     -fsync: Input/output error                                                                                                                                                                 
>     +/mnt/scratch/fsync-open-after-err: Input/output error

I think I see the problem. I was testing this with a logdev. I think
we'll need a _require_logdev directive in the test. I'll send a v4 in a
bit with that and some other corrections.

>                                                                                                                                      
>                                                                                                                                                                                                
> While btrfs fails as:                                                                                                                                                                          
>     -fsync: Input/output error                                                                                                                                                                 
>     +/mnt/scratch/fsync-open-after-err: Read-only file system
>                                                              

I think btrfs will require a specialized testcase (like we did for some
of the earlier tests in this area). I'll send a separate patch for that
one.

>                                                                      
>                                                                                                                                                                                                
> And both XFS and btrfs behave in the same way no matter I have the fix                                                                                                                         
> applied or not (tested on v4.16 kernel and v4.17-rc3+ kernel).                                                                                                                                 
>                                                                                                                                                                                                
> Only ext4 passes the test with v4.17-rc3+ kernel, and fails on v4.16                                                                                                                           
> kernel as expected:                                                                                                                                                                            
>     -fsync: Input/output error                                                                                                                                                                 
>                                                                                                                                                                                                
> Could you please take a look?                                                                                                                                                                  
>                                                                                                                                                                                                
> Thanks,                                                                                                                                                                                        
> Eryu
> 
> > +
> > +# close file
> > +exec 5>&-
> > +
> > +# success, all done
> > +_dmerror_unmount
> > +_dmerror_cleanup
> > +
> > +status=0
> > +exit
> > diff --git a/tests/generic/999.out b/tests/generic/999.out
> > new file mode 100644
> > index 000000000000..38d2d7f6495f
> > --- /dev/null
> > +++ b/tests/generic/999.out
> > @@ -0,0 +1,5 @@
> > +QA output created by 999
> > +Format and mount
> > +The following fsync should fail with EIO:
> > +fsync: Input/output error
> > +done
> > diff --git a/tests/generic/group b/tests/generic/group
> > index ea8e51b35e79..48f491a5c32b 100644
> > --- a/tests/generic/group
> > +++ b/tests/generic/group
> > @@ -486,3 +486,4 @@
> >  481 auto quick log metadata
> >  482 auto metadata replay
> >  483 auto quick log metadata
> > +999 auto quick
> > -- 
> > 2.14.3
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe fstests" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-05-08 12:46 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-27 16:38 [PATCH] generic: test for seeing unseen fsync errors on newly open files Jeff Layton
2018-04-27 16:58 ` Andres Freund
2018-04-27 17:20   ` Jeff Layton
2018-04-28  7:27 ` Amir Goldstein
2018-04-28 12:05   ` Jeff Layton
2018-04-28 14:59 ` [PATCH v2] " Jeff Layton
2018-04-28 15:19   ` Amir Goldstein
2018-04-28 23:06   ` [PATCH v3] " Jeff Layton
2018-05-02  5:50     ` Eryu Guan
2018-05-08 12:46       ` Jeff Layton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.