fstests.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHSET v4 0/1] fstests: make sure NEEDSREPAIR feature stops mounts
@ 2021-04-14  1:05 Darrick J. Wong
  2021-04-14  1:05 ` [PATCH 1/1] xfs: test that the needsrepair feature works as advertised Darrick J. Wong
  0 siblings, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2021-04-14  1:05 UTC (permalink / raw)
  To: djwong, guaneryu; +Cc: linux-xfs, fstests, guan

Hi all,

Quick test to make sure that having the new incompat "needs repair" feature
flag actally prevents mounting, and that xfs_repair can clean up whatever
happened.

v2: fix bash variable error, fix a problem found when using xfs_admin
    with external log devices
v3: Fix a stupid naming bug in v2.
v4: Fix other review comments.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=needsrepair

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=needsrepair

fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=needsrepair
---
 common/xfs        |   21 ++++++++++++++
 tests/xfs/768     |   80 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/xfs/768.out |    4 +++
 tests/xfs/770     |   82 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/xfs/770.out |    2 +
 tests/xfs/group   |    2 +
 6 files changed, 191 insertions(+)
 create mode 100755 tests/xfs/768
 create mode 100644 tests/xfs/768.out
 create mode 100755 tests/xfs/770
 create mode 100644 tests/xfs/770.out


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/1] xfs: test that the needsrepair feature works as advertised
  2021-04-14  1:05 [PATCHSET v4 0/1] fstests: make sure NEEDSREPAIR feature stops mounts Darrick J. Wong
@ 2021-04-14  1:05 ` Darrick J. Wong
  2021-04-18 12:14   ` Eryu Guan
  0 siblings, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2021-04-14  1:05 UTC (permalink / raw)
  To: djwong, guaneryu; +Cc: linux-xfs, fstests, guan

From: Darrick J. Wong <djwong@kernel.org>

Make sure that the needsrepair feature flag can be cleared only by
repair and that mounts are prohibited when the feature is set.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 common/xfs        |   21 ++++++++++++++
 tests/xfs/768     |   80 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/xfs/768.out |    4 +++
 tests/xfs/770     |   82 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/xfs/770.out |    2 +
 tests/xfs/group   |    2 +
 6 files changed, 191 insertions(+)
 create mode 100755 tests/xfs/768
 create mode 100644 tests/xfs/768.out
 create mode 100755 tests/xfs/770
 create mode 100644 tests/xfs/770.out


diff --git a/common/xfs b/common/xfs
index 887bd001..fa204663 100644
--- a/common/xfs
+++ b/common/xfs
@@ -1114,3 +1114,24 @@ _xfs_get_cowgc_interval() {
 		_fail "Can't find cowgc interval procfs knob?"
 	fi
 }
+
+# Print the status of the given features on the scratch filesystem.
+# Returns 0 if all features are found, 1 otherwise.
+_check_scratch_xfs_features()
+{
+	local features="$(_scratch_xfs_db -c 'version')"
+	local output=("FEATURES:")
+	local found=0
+
+	for feature in "$@"; do
+		local status="NO"
+		if echo "${features}" | grep -q -w "${feature}"; then
+			status="YES"
+			found=$((found + 1))
+		fi
+		output+=("${feature}:${status}")
+	done
+
+	echo "${output[@]}"
+	test "${found}" -eq "$#"
+}
diff --git a/tests/xfs/768 b/tests/xfs/768
new file mode 100755
index 00000000..dd9c53be
--- /dev/null
+++ b/tests/xfs/768
@@ -0,0 +1,80 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2021 Oracle.  All Rights Reserved.
+#
+# FS QA Test No. 768
+#
+# Make sure that the kernel won't mount a filesystem if repair forcibly sets
+# NEEDSREPAIR while fixing metadata.  Corrupt a directory in such a way as
+# to force repair to write an invalid dirent value as a sentinel to trigger a
+# repair activity in a later phase.  Use a debug knob in xfs_repair to abort
+# the repair immediately after forcing the flag on.
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1    # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs xfs
+_require_scratch
+grep -q LIBXFS_DEBUG_WRITE_CRASH $XFS_REPAIR_PROG || \
+		_notrun "libxfs write failure injection hook not detected?"
+
+rm -f $seqres.full
+
+# Set up a real filesystem for our actual test
+_scratch_mkfs -m crc=1 >> $seqres.full
+
+# Create a directory large enough to have a dir data block.  2k worth of
+# dirent names ought to do it.
+_scratch_mount
+mkdir -p $SCRATCH_MNT/fubar
+for i in $(seq 0 256 2048); do
+	fname=$(printf "%0255d" $i)
+	ln -s -f urk $SCRATCH_MNT/fubar/$fname
+done
+inum=$(stat -c '%i' $SCRATCH_MNT/fubar)
+_scratch_unmount
+
+# Fuzz the directory
+_scratch_xfs_db -x -c "inode $inum" -c "dblock 0" \
+	-c "fuzz -d bu[2].inumber add" >> $seqres.full
+
+# Try to repair the directory, force it to crash after setting needsrepair
+LIBXFS_DEBUG_WRITE_CRASH=ddev=2 _scratch_xfs_repair 2>> $seqres.full
+test $? -eq 137 || echo "repair should have been killed??"
+
+# We can't mount, right?
+_check_scratch_xfs_features NEEDSREPAIR
+_try_scratch_mount &> $tmp.mount
+res=$?
+_filter_scratch < $tmp.mount
+if [ $res -eq 0 ]; then
+	echo "Should not be able to mount after needsrepair crash"
+	_scratch_unmount
+fi
+
+# Repair properly this time and retry the mount
+_scratch_xfs_repair 2>> $seqres.full
+_check_scratch_xfs_features NEEDSREPAIR
+
+_scratch_mount
+
+# success, all done
+status=0
+exit
diff --git a/tests/xfs/768.out b/tests/xfs/768.out
new file mode 100644
index 00000000..1168ba25
--- /dev/null
+++ b/tests/xfs/768.out
@@ -0,0 +1,4 @@
+QA output created by 768
+FEATURES: NEEDSREPAIR:YES
+mount: SCRATCH_MNT: mount(2) system call failed: Structure needs cleaning.
+FEATURES: NEEDSREPAIR:NO
diff --git a/tests/xfs/770 b/tests/xfs/770
new file mode 100755
index 00000000..574047d5
--- /dev/null
+++ b/tests/xfs/770
@@ -0,0 +1,82 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2021 Oracle.  All Rights Reserved.
+#
+# FS QA Test No. 770
+#
+# Populate a filesystem with all types of metadata, then run repair with the
+# libxfs write failure trigger set to go after a single write.  Check that the
+# injected error trips, causing repair to abort, that needsrepair is set on the
+# fs, the kernel won't mount; and that a non-injecting repair run clears
+# needsrepair and makes the filesystem mountable again.
+#
+# Repeat with the trip point set to successively higher numbers of writes until
+# we hit ~200 writes or repair manages to run to completion without tripping.
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1    # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/populate
+. ./common/filter
+
+# real QA test starts here
+_supported_fs xfs
+
+_require_scratch_xfs_crc		# needsrepair only exists for v5
+_require_populate_commands
+
+rm -f ${RESULT_DIR}/require_scratch	# we take care of checking the fs
+rm -f $seqres.full
+
+# Populate the filesystem
+_scratch_populate_cached nofill >> $seqres.full 2>&1
+
+max_writes=200			# 200 loops should be enough for anyone
+nr_incr=$((13 / TIME_FACTOR))
+test $nr_incr -lt 1 && nr_incr=1
+for ((nr_writes = 1; nr_writes < max_writes; nr_writes += nr_incr)); do
+	# Start a repair and force it to abort after some number of writes
+	LIBXFS_DEBUG_WRITE_CRASH=ddev=$nr_writes _scratch_xfs_repair 2>> $seqres.full
+	res=$?
+	if [ $res -ne 0 ] && [ $res -ne 137 ]; then
+		echo "repair failed with $res??"
+		break
+	elif [ $res -eq 0 ]; then
+		[ $nr_writes -eq 1 ] && \
+			echo "ran to completion on the first try?"
+		break
+	fi
+
+	# Check the state of NEEDSREPAIR after repair fails.  If it isn't set
+	# but if repair -n says the fs is clean, then it's possible that the
+	# injected error caused it to abort immediately after the write that
+	# cleared NEEDSREPAIR.
+	if ! _check_scratch_xfs_features NEEDSREPAIR > /dev/null &&
+	   ! _scratch_xfs_repair -n &>> $seqres.full; then
+		echo "NEEDSREPAIR should be set on corrupt fs"
+	fi
+
+	# Repair properly this time and retry the mount
+	_scratch_xfs_repair 2>> $seqres.full
+	_check_scratch_xfs_features NEEDSREPAIR > /dev/null && \
+		echo "Repair failed to clear NEEDSREPAIR on the $nr_writes writes test"
+done
+
+# success, all done
+echo Silence is golden.
+status=0
+exit
diff --git a/tests/xfs/770.out b/tests/xfs/770.out
new file mode 100644
index 00000000..725d740b
--- /dev/null
+++ b/tests/xfs/770.out
@@ -0,0 +1,2 @@
+QA output created by 770
+Silence is golden.
diff --git a/tests/xfs/group b/tests/xfs/group
index fe83f82d..09fddb5a 100644
--- a/tests/xfs/group
+++ b/tests/xfs/group
@@ -520,3 +520,5 @@
 537 auto quick
 538 auto stress
 539 auto quick mount
+768 auto quick repair
+770 auto repair


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/1] xfs: test that the needsrepair feature works as advertised
  2021-04-14  1:05 ` [PATCH 1/1] xfs: test that the needsrepair feature works as advertised Darrick J. Wong
@ 2021-04-18 12:14   ` Eryu Guan
  2021-04-20 18:26     ` Darrick J. Wong
  0 siblings, 1 reply; 10+ messages in thread
From: Eryu Guan @ 2021-04-18 12:14 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: guaneryu, linux-xfs, fstests

On Tue, Apr 13, 2021 at 06:05:36PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Make sure that the needsrepair feature flag can be cleared only by
> repair and that mounts are prohibited when the feature is set.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  common/xfs        |   21 ++++++++++++++
>  tests/xfs/768     |   80 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/xfs/768.out |    4 +++
>  tests/xfs/770     |   82 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/xfs/770.out |    2 +
>  tests/xfs/group   |    2 +
>  6 files changed, 191 insertions(+)
>  create mode 100755 tests/xfs/768
>  create mode 100644 tests/xfs/768.out
>  create mode 100755 tests/xfs/770
>  create mode 100644 tests/xfs/770.out
> 
> 
> diff --git a/common/xfs b/common/xfs
> index 887bd001..fa204663 100644
> --- a/common/xfs
> +++ b/common/xfs
> @@ -1114,3 +1114,24 @@ _xfs_get_cowgc_interval() {
>  		_fail "Can't find cowgc interval procfs knob?"
>  	fi
>  }
> +
> +# Print the status of the given features on the scratch filesystem.
> +# Returns 0 if all features are found, 1 otherwise.
> +_check_scratch_xfs_features()
> +{
> +	local features="$(_scratch_xfs_db -c 'version')"
> +	local output=("FEATURES:")
> +	local found=0
> +
> +	for feature in "$@"; do
> +		local status="NO"
> +		if echo "${features}" | grep -q -w "${feature}"; then
> +			status="YES"
> +			found=$((found + 1))
> +		fi
> +		output+=("${feature}:${status}")
> +	done
> +
> +	echo "${output[@]}"
> +	test "${found}" -eq "$#"
> +}
> diff --git a/tests/xfs/768 b/tests/xfs/768
> new file mode 100755
> index 00000000..dd9c53be
> --- /dev/null
> +++ b/tests/xfs/768
> @@ -0,0 +1,80 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +# Copyright (c) 2021 Oracle.  All Rights Reserved.
> +#
> +# FS QA Test No. 768
> +#
> +# Make sure that the kernel won't mount a filesystem if repair forcibly sets
> +# NEEDSREPAIR while fixing metadata.  Corrupt a directory in such a way as
> +# to force repair to write an invalid dirent value as a sentinel to trigger a
> +# repair activity in a later phase.  Use a debug knob in xfs_repair to abort
> +# the repair immediately after forcing the flag on.
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1    # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +# real QA test starts here
> +_supported_fs xfs
> +_require_scratch
> +grep -q LIBXFS_DEBUG_WRITE_CRASH $XFS_REPAIR_PROG || \
> +		_notrun "libxfs write failure injection hook not detected?"

Sorry that I didn't notice it earlier, but this pattern repeats in both
tests, might be possible to turn it into a common helper?

And $XFS_REPAIR_PROG may contain some pre-defined options along with
xfs_repair in the future, like $DF_PROG is actually defined as
"df -T -P". Perhaps $(type -P xfs_repair) is better here.

> +
> +rm -f $seqres.full
> +
> +# Set up a real filesystem for our actual test
> +_scratch_mkfs -m crc=1 >> $seqres.full

Test relies on -m crc=1, so probably we need _require_xfs_crc as well to
make sure userspace and kernel supports crc.

Or just like xfs/770, use _require_scratch_xfs_crc instead of
_require_scratch?

> +
> +# Create a directory large enough to have a dir data block.  2k worth of
> +# dirent names ought to do it.
> +_scratch_mount
> +mkdir -p $SCRATCH_MNT/fubar
> +for i in $(seq 0 256 2048); do
> +	fname=$(printf "%0255d" $i)
> +	ln -s -f urk $SCRATCH_MNT/fubar/$fname
> +done
> +inum=$(stat -c '%i' $SCRATCH_MNT/fubar)
> +_scratch_unmount
> +
> +# Fuzz the directory
> +_scratch_xfs_db -x -c "inode $inum" -c "dblock 0" \
> +	-c "fuzz -d bu[2].inumber add" >> $seqres.full
> +
> +# Try to repair the directory, force it to crash after setting needsrepair
> +LIBXFS_DEBUG_WRITE_CRASH=ddev=2 _scratch_xfs_repair 2>> $seqres.full
> +test $? -eq 137 || echo "repair should have been killed??"
> +
> +# We can't mount, right?
> +_check_scratch_xfs_features NEEDSREPAIR
> +_try_scratch_mount &> $tmp.mount
> +res=$?
> +_filter_scratch < $tmp.mount
> +if [ $res -eq 0 ]; then
> +	echo "Should not be able to mount after needsrepair crash"
> +	_scratch_unmount
> +fi
> +
> +# Repair properly this time and retry the mount
> +_scratch_xfs_repair 2>> $seqres.full
> +_check_scratch_xfs_features NEEDSREPAIR
> +
> +_scratch_mount
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/xfs/768.out b/tests/xfs/768.out
> new file mode 100644
> index 00000000..1168ba25
> --- /dev/null
> +++ b/tests/xfs/768.out
> @@ -0,0 +1,4 @@
> +QA output created by 768
> +FEATURES: NEEDSREPAIR:YES
> +mount: SCRATCH_MNT: mount(2) system call failed: Structure needs cleaning.
> +FEATURES: NEEDSREPAIR:NO
> diff --git a/tests/xfs/770 b/tests/xfs/770
> new file mode 100755
> index 00000000..574047d5
> --- /dev/null
> +++ b/tests/xfs/770
> @@ -0,0 +1,82 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +# Copyright (c) 2021 Oracle.  All Rights Reserved.
> +#
> +# FS QA Test No. 770
> +#
> +# Populate a filesystem with all types of metadata, then run repair with the
> +# libxfs write failure trigger set to go after a single write.  Check that the
> +# injected error trips, causing repair to abort, that needsrepair is set on the
> +# fs, the kernel won't mount; and that a non-injecting repair run clears
> +# needsrepair and makes the filesystem mountable again.
> +#
> +# Repeat with the trip point set to successively higher numbers of writes until
> +# we hit ~200 writes or repair manages to run to completion without tripping.
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1    # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/populate
> +. ./common/filter
> +
> +# real QA test starts here
> +_supported_fs xfs
> +
> +_require_scratch_xfs_crc		# needsrepair only exists for v5
> +_require_populate_commands
> +
> +rm -f ${RESULT_DIR}/require_scratch	# we take care of checking the fs

Use _require_scratch_nocheck instead?

Thanks,
Eryu

> +rm -f $seqres.full
> +
> +# Populate the filesystem
> +_scratch_populate_cached nofill >> $seqres.full 2>&1
> +
> +max_writes=200			# 200 loops should be enough for anyone
> +nr_incr=$((13 / TIME_FACTOR))
> +test $nr_incr -lt 1 && nr_incr=1
> +for ((nr_writes = 1; nr_writes < max_writes; nr_writes += nr_incr)); do
> +	# Start a repair and force it to abort after some number of writes
> +	LIBXFS_DEBUG_WRITE_CRASH=ddev=$nr_writes _scratch_xfs_repair 2>> $seqres.full
> +	res=$?
> +	if [ $res -ne 0 ] && [ $res -ne 137 ]; then
> +		echo "repair failed with $res??"
> +		break
> +	elif [ $res -eq 0 ]; then
> +		[ $nr_writes -eq 1 ] && \
> +			echo "ran to completion on the first try?"
> +		break
> +	fi
> +
> +	# Check the state of NEEDSREPAIR after repair fails.  If it isn't set
> +	# but if repair -n says the fs is clean, then it's possible that the
> +	# injected error caused it to abort immediately after the write that
> +	# cleared NEEDSREPAIR.
> +	if ! _check_scratch_xfs_features NEEDSREPAIR > /dev/null &&
> +	   ! _scratch_xfs_repair -n &>> $seqres.full; then
> +		echo "NEEDSREPAIR should be set on corrupt fs"
> +	fi
> +
> +	# Repair properly this time and retry the mount
> +	_scratch_xfs_repair 2>> $seqres.full
> +	_check_scratch_xfs_features NEEDSREPAIR > /dev/null && \
> +		echo "Repair failed to clear NEEDSREPAIR on the $nr_writes writes test"
> +done
> +
> +# success, all done
> +echo Silence is golden.
> +status=0
> +exit
> diff --git a/tests/xfs/770.out b/tests/xfs/770.out
> new file mode 100644
> index 00000000..725d740b
> --- /dev/null
> +++ b/tests/xfs/770.out
> @@ -0,0 +1,2 @@
> +QA output created by 770
> +Silence is golden.
> diff --git a/tests/xfs/group b/tests/xfs/group
> index fe83f82d..09fddb5a 100644
> --- a/tests/xfs/group
> +++ b/tests/xfs/group
> @@ -520,3 +520,5 @@
>  537 auto quick
>  538 auto stress
>  539 auto quick mount
> +768 auto quick repair
> +770 auto repair

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/1] xfs: test that the needsrepair feature works as advertised
  2021-04-18 12:14   ` Eryu Guan
@ 2021-04-20 18:26     ` Darrick J. Wong
  0 siblings, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2021-04-20 18:26 UTC (permalink / raw)
  To: Eryu Guan; +Cc: guaneryu, linux-xfs, fstests

On Sun, Apr 18, 2021 at 08:14:58PM +0800, Eryu Guan wrote:
> On Tue, Apr 13, 2021 at 06:05:36PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Make sure that the needsrepair feature flag can be cleared only by
> > repair and that mounts are prohibited when the feature is set.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  common/xfs        |   21 ++++++++++++++
> >  tests/xfs/768     |   80 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  tests/xfs/768.out |    4 +++
> >  tests/xfs/770     |   82 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  tests/xfs/770.out |    2 +
> >  tests/xfs/group   |    2 +
> >  6 files changed, 191 insertions(+)
> >  create mode 100755 tests/xfs/768
> >  create mode 100644 tests/xfs/768.out
> >  create mode 100755 tests/xfs/770
> >  create mode 100644 tests/xfs/770.out
> > 
> > 
> > diff --git a/common/xfs b/common/xfs
> > index 887bd001..fa204663 100644
> > --- a/common/xfs
> > +++ b/common/xfs
> > @@ -1114,3 +1114,24 @@ _xfs_get_cowgc_interval() {
> >  		_fail "Can't find cowgc interval procfs knob?"
> >  	fi
> >  }
> > +
> > +# Print the status of the given features on the scratch filesystem.
> > +# Returns 0 if all features are found, 1 otherwise.
> > +_check_scratch_xfs_features()
> > +{
> > +	local features="$(_scratch_xfs_db -c 'version')"
> > +	local output=("FEATURES:")
> > +	local found=0
> > +
> > +	for feature in "$@"; do
> > +		local status="NO"
> > +		if echo "${features}" | grep -q -w "${feature}"; then
> > +			status="YES"
> > +			found=$((found + 1))
> > +		fi
> > +		output+=("${feature}:${status}")
> > +	done
> > +
> > +	echo "${output[@]}"
> > +	test "${found}" -eq "$#"
> > +}
> > diff --git a/tests/xfs/768 b/tests/xfs/768
> > new file mode 100755
> > index 00000000..dd9c53be
> > --- /dev/null
> > +++ b/tests/xfs/768
> > @@ -0,0 +1,80 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0-or-later
> > +# Copyright (c) 2021 Oracle.  All Rights Reserved.
> > +#
> > +# FS QA Test No. 768
> > +#
> > +# Make sure that the kernel won't mount a filesystem if repair forcibly sets
> > +# NEEDSREPAIR while fixing metadata.  Corrupt a directory in such a way as
> > +# to force repair to write an invalid dirent value as a sentinel to trigger a
> > +# repair activity in a later phase.  Use a debug knob in xfs_repair to abort
> > +# the repair immediately after forcing the flag on.
> > +
> > +seq=`basename $0`
> > +seqres=$RESULT_DIR/$seq
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +tmp=/tmp/$$
> > +status=1    # failure is the default!
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +_cleanup()
> > +{
> > +	cd /
> > +	rm -f $tmp.*
> > +}
> > +
> > +# get standard environment, filters and checks
> > +. ./common/rc
> > +. ./common/filter
> > +
> > +# real QA test starts here
> > +_supported_fs xfs
> > +_require_scratch
> > +grep -q LIBXFS_DEBUG_WRITE_CRASH $XFS_REPAIR_PROG || \
> > +		_notrun "libxfs write failure injection hook not detected?"
> 
> Sorry that I didn't notice it earlier, but this pattern repeats in both
> tests, might be possible to turn it into a common helper?
> 
> And $XFS_REPAIR_PROG may contain some pre-defined options along with
> xfs_repair in the future, like $DF_PROG is actually defined as
> "df -T -P". Perhaps $(type -P xfs_repair) is better here.

<shrug> So far nobody's tried that and there are other tests that assume
that things like $XFS_SCRUB_PROG are strict file paths, but I don't mind
changing this to use `type`.  I'll refactor this into a helper function
while I'm at it.

> > +
> > +rm -f $seqres.full
> > +
> > +# Set up a real filesystem for our actual test
> > +_scratch_mkfs -m crc=1 >> $seqres.full
> 
> Test relies on -m crc=1, so probably we need _require_xfs_crc as well to
> make sure userspace and kernel supports crc.
> 
> Or just like xfs/770, use _require_scratch_xfs_crc instead of
> _require_scratch?

Ok, done.

> > +
> > +# Create a directory large enough to have a dir data block.  2k worth of
> > +# dirent names ought to do it.
> > +_scratch_mount
> > +mkdir -p $SCRATCH_MNT/fubar
> > +for i in $(seq 0 256 2048); do
> > +	fname=$(printf "%0255d" $i)
> > +	ln -s -f urk $SCRATCH_MNT/fubar/$fname
> > +done
> > +inum=$(stat -c '%i' $SCRATCH_MNT/fubar)
> > +_scratch_unmount
> > +
> > +# Fuzz the directory
> > +_scratch_xfs_db -x -c "inode $inum" -c "dblock 0" \
> > +	-c "fuzz -d bu[2].inumber add" >> $seqres.full
> > +
> > +# Try to repair the directory, force it to crash after setting needsrepair
> > +LIBXFS_DEBUG_WRITE_CRASH=ddev=2 _scratch_xfs_repair 2>> $seqres.full
> > +test $? -eq 137 || echo "repair should have been killed??"
> > +
> > +# We can't mount, right?
> > +_check_scratch_xfs_features NEEDSREPAIR
> > +_try_scratch_mount &> $tmp.mount
> > +res=$?
> > +_filter_scratch < $tmp.mount
> > +if [ $res -eq 0 ]; then
> > +	echo "Should not be able to mount after needsrepair crash"
> > +	_scratch_unmount
> > +fi
> > +
> > +# Repair properly this time and retry the mount
> > +_scratch_xfs_repair 2>> $seqres.full
> > +_check_scratch_xfs_features NEEDSREPAIR
> > +
> > +_scratch_mount
> > +
> > +# success, all done
> > +status=0
> > +exit
> > diff --git a/tests/xfs/768.out b/tests/xfs/768.out
> > new file mode 100644
> > index 00000000..1168ba25
> > --- /dev/null
> > +++ b/tests/xfs/768.out
> > @@ -0,0 +1,4 @@
> > +QA output created by 768
> > +FEATURES: NEEDSREPAIR:YES
> > +mount: SCRATCH_MNT: mount(2) system call failed: Structure needs cleaning.
> > +FEATURES: NEEDSREPAIR:NO
> > diff --git a/tests/xfs/770 b/tests/xfs/770
> > new file mode 100755
> > index 00000000..574047d5
> > --- /dev/null
> > +++ b/tests/xfs/770
> > @@ -0,0 +1,82 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0-or-later
> > +# Copyright (c) 2021 Oracle.  All Rights Reserved.
> > +#
> > +# FS QA Test No. 770
> > +#
> > +# Populate a filesystem with all types of metadata, then run repair with the
> > +# libxfs write failure trigger set to go after a single write.  Check that the
> > +# injected error trips, causing repair to abort, that needsrepair is set on the
> > +# fs, the kernel won't mount; and that a non-injecting repair run clears
> > +# needsrepair and makes the filesystem mountable again.
> > +#
> > +# Repeat with the trip point set to successively higher numbers of writes until
> > +# we hit ~200 writes or repair manages to run to completion without tripping.
> > +
> > +seq=`basename $0`
> > +seqres=$RESULT_DIR/$seq
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +tmp=/tmp/$$
> > +status=1    # failure is the default!
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +_cleanup()
> > +{
> > +	cd /
> > +	rm -f $tmp.*
> > +}
> > +
> > +# get standard environment, filters and checks
> > +. ./common/rc
> > +. ./common/populate
> > +. ./common/filter
> > +
> > +# real QA test starts here
> > +_supported_fs xfs
> > +
> > +_require_scratch_xfs_crc		# needsrepair only exists for v5
> > +_require_populate_commands
> > +
> > +rm -f ${RESULT_DIR}/require_scratch	# we take care of checking the fs
> 
> Use _require_scratch_nocheck instead?

Fixed, thanks.

--D

> 
> Thanks,
> Eryu
> 
> > +rm -f $seqres.full
> > +
> > +# Populate the filesystem
> > +_scratch_populate_cached nofill >> $seqres.full 2>&1
> > +
> > +max_writes=200			# 200 loops should be enough for anyone
> > +nr_incr=$((13 / TIME_FACTOR))
> > +test $nr_incr -lt 1 && nr_incr=1
> > +for ((nr_writes = 1; nr_writes < max_writes; nr_writes += nr_incr)); do
> > +	# Start a repair and force it to abort after some number of writes
> > +	LIBXFS_DEBUG_WRITE_CRASH=ddev=$nr_writes _scratch_xfs_repair 2>> $seqres.full
> > +	res=$?
> > +	if [ $res -ne 0 ] && [ $res -ne 137 ]; then
> > +		echo "repair failed with $res??"
> > +		break
> > +	elif [ $res -eq 0 ]; then
> > +		[ $nr_writes -eq 1 ] && \
> > +			echo "ran to completion on the first try?"
> > +		break
> > +	fi
> > +
> > +	# Check the state of NEEDSREPAIR after repair fails.  If it isn't set
> > +	# but if repair -n says the fs is clean, then it's possible that the
> > +	# injected error caused it to abort immediately after the write that
> > +	# cleared NEEDSREPAIR.
> > +	if ! _check_scratch_xfs_features NEEDSREPAIR > /dev/null &&
> > +	   ! _scratch_xfs_repair -n &>> $seqres.full; then
> > +		echo "NEEDSREPAIR should be set on corrupt fs"
> > +	fi
> > +
> > +	# Repair properly this time and retry the mount
> > +	_scratch_xfs_repair 2>> $seqres.full
> > +	_check_scratch_xfs_features NEEDSREPAIR > /dev/null && \
> > +		echo "Repair failed to clear NEEDSREPAIR on the $nr_writes writes test"
> > +done
> > +
> > +# success, all done
> > +echo Silence is golden.
> > +status=0
> > +exit
> > diff --git a/tests/xfs/770.out b/tests/xfs/770.out
> > new file mode 100644
> > index 00000000..725d740b
> > --- /dev/null
> > +++ b/tests/xfs/770.out
> > @@ -0,0 +1,2 @@
> > +QA output created by 770
> > +Silence is golden.
> > diff --git a/tests/xfs/group b/tests/xfs/group
> > index fe83f82d..09fddb5a 100644
> > --- a/tests/xfs/group
> > +++ b/tests/xfs/group
> > @@ -520,3 +520,5 @@
> >  537 auto quick
> >  538 auto stress
> >  539 auto quick mount
> > +768 auto quick repair
> > +770 auto repair

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/1] xfs: test that the needsrepair feature works as advertised
  2021-04-21 20:37     ` Darrick J. Wong
@ 2021-04-22 11:25       ` Brian Foster
  0 siblings, 0 replies; 10+ messages in thread
From: Brian Foster @ 2021-04-22 11:25 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: guaneryu, linux-xfs, fstests, guan

On Wed, Apr 21, 2021 at 01:37:46PM -0700, Darrick J. Wong wrote:
> On Wed, Apr 21, 2021 at 01:29:36PM -0400, Brian Foster wrote:
> > On Tue, Apr 20, 2021 at 05:22:41PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Make sure that the needsrepair feature flag can be cleared only by
> > > repair and that mounts are prohibited when the feature is set.
> > > 
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > >  common/xfs        |   28 ++++++++++++++++++
> > >  tests/xfs/768     |   80 +++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  tests/xfs/768.out |    4 +++
> > >  tests/xfs/770     |   83 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  tests/xfs/770.out |    2 +
> > >  tests/xfs/group   |    2 +
> > >  6 files changed, 199 insertions(+)
> > >  create mode 100755 tests/xfs/768
> > >  create mode 100644 tests/xfs/768.out
> > >  create mode 100755 tests/xfs/770
> > >  create mode 100644 tests/xfs/770.out
> > > 
> > > 
...
> > > diff --git a/tests/xfs/770 b/tests/xfs/770
> > > new file mode 100755
> > > index 00000000..40e67ab5
> > > --- /dev/null
> > > +++ b/tests/xfs/770
> > > @@ -0,0 +1,83 @@
> > > +#! /bin/bash
> > > +# SPDX-License-Identifier: GPL-2.0-or-later
> > > +# Copyright (c) 2021 Oracle.  All Rights Reserved.
> > > +#
> > > +# FS QA Test No. 770
> > > +#
> > > +# Populate a filesystem with all types of metadata, then run repair with the
> > > +# libxfs write failure trigger set to go after a single write.  Check that the
> > > +# injected error trips, causing repair to abort, that needsrepair is set on the
> > > +# fs, the kernel won't mount; and that a non-injecting repair run clears
> > > +# needsrepair and makes the filesystem mountable again.
> > > +#
> > > +# Repeat with the trip point set to successively higher numbers of writes until
> > > +# we hit ~200 writes or repair manages to run to completion without tripping.
> > > +
> > > +seq=`basename $0`
> > > +seqres=$RESULT_DIR/$seq
> > > +echo "QA output created by $seq"
> > > +
> > > +here=`pwd`
> > > +tmp=/tmp/$$
> > > +status=1    # failure is the default!
> > > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > > +
> > > +_cleanup()
> > > +{
> > > +	cd /
> > > +	rm -f $tmp.*
> > > +}
> > > +
> > > +# get standard environment, filters and checks
> > > +. ./common/rc
> > > +. ./common/populate
> > > +. ./common/filter
> > > +
> > > +# real QA test starts here
> > > +_supported_fs xfs
> > > +_require_scratch_nocheck
> > > +_require_scratch_xfs_crc		# needsrepair only exists for v5
> > > +_require_populate_commands
> > > +_require_libxfs_debug_flag LIBXFS_DEBUG_WRITE_CRASH
> > > +
> > > +rm -f $seqres.full
> > > +
> > > +# Populate the filesystem
> > > +_scratch_populate_cached nofill >> $seqres.full 2>&1
> > > +
> > > +max_writes=200			# 200 loops should be enough for anyone
> > > +nr_incr=$((13 / TIME_FACTOR))
> > 
> > Could we randomize this increment so we get varying behavior run to run?
> > It might be nice to actually do that on a per-iteration basis as well
> > rather than once at the start of the test.
> 
> Ok, I'll add a little bit of randomness to each run.
> 
> How about:
> 
> 	local crash_after=$(( nr_writes + ((RANDOM % 7) - 3) ))
> 	LIBXFS_DEBUG_WRITE_CRASH=ddev=$crash_after _scratch_xfs_repair
> 
> ?
> 

Seems reasonable to me.

> > > +test $nr_incr -lt 1 && nr_incr=1
> > > +for ((nr_writes = 1; nr_writes < max_writes; nr_writes += nr_incr)); do
> > > +	# Start a repair and force it to abort after some number of writes
> > > +	LIBXFS_DEBUG_WRITE_CRASH=ddev=$nr_writes \
> > > +			_scratch_xfs_repair 2>> $seqres.full
> > > +	res=$?
> > > +	if [ $res -ne 0 ] && [ $res -ne 137 ]; then
> > > +		echo "repair failed with $res??"
> > > +		break
> > > +	elif [ $res -eq 0 ]; then
> > > +		[ $nr_writes -eq 1 ] && \
> > > +			echo "ran to completion on the first try?"
> > > +		break
> > > +	fi
> > > +
> > > +	# Check the state of NEEDSREPAIR after repair fails.  If it isn't set
> > > +	# but if repair -n says the fs is clean, then it's possible that the
> > > +	# injected error caused it to abort immediately after the write that
> > > +	# cleared NEEDSREPAIR.
> > > +	if ! _check_scratch_xfs_features NEEDSREPAIR > /dev/null &&
> > > +	   ! _scratch_xfs_repair -n &>> $seqres.full; then
> > > +		echo "NEEDSREPAIR should be set on corrupt fs"
> > > +	fi
> > > +
> > > +	# Repair properly this time and retry the mount
> > 
> > We can probably drop the "retry the mount" bit since we no longer do
> > that.
> 
> Oops, yes, good catch.
> 
> > > +	_scratch_xfs_repair 2>> $seqres.full
> > > +	_check_scratch_xfs_features NEEDSREPAIR > /dev/null && \
> > > +		echo "Repair failed to clear NEEDSREPAIR on the $nr_writes writes test"
> > 
> > Maybe I'm mistaken, but I thought we were going to let repair run and
> > fail repeatedly/incrementally and then leave the full repair for the
> > end..?
> 
> Ah.  Last time you wrote:
> 
> "It probably makes sense to test that NEEDSREPAIR remains set throughout
> the test sequence until repair completes cleanly..."
> 
> and I interpreted "throughout the test sequence" to mean "until the
> end of the loop body", not the whole test.
> 

Looking back, I think I was referring to the mount cycles in that
particular comment and had the repeated failure test in mind when I
wrote:

"Would we expect much difference in behavior if we populated once at the
start of the test and then just bumped up the write count until we get
to the max or the repair completes?"

... but that was probably not clear. Anyways..

> But I guess it /would/ be more interesting to study the effects of
> multiple successive write failures. ;)
> 

Yeah, assuming it doesn't outright explode from the onset, I think it's
beneficial to take full advantage of the failure mechanism and make the
test as mean as possible. ;)

Brian

> --D
> 
> > 
> > Brian
> > 
> > > +done
> > > +
> > > +# success, all done
> > > +echo Silence is golden.
> > > +status=0
> > > +exit
> > > diff --git a/tests/xfs/770.out b/tests/xfs/770.out
> > > new file mode 100644
> > > index 00000000..725d740b
> > > --- /dev/null
> > > +++ b/tests/xfs/770.out
> > > @@ -0,0 +1,2 @@
> > > +QA output created by 770
> > > +Silence is golden.
> > > diff --git a/tests/xfs/group b/tests/xfs/group
> > > index d1b1456b..461ae2b2 100644
> > > --- a/tests/xfs/group
> > > +++ b/tests/xfs/group
> > > @@ -522,3 +522,5 @@
> > >  537 auto quick
> > >  538 auto stress
> > >  539 auto quick mount
> > > +768 auto quick repair
> > > +770 auto repair
> > > 
> > 
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/1] xfs: test that the needsrepair feature works as advertised
  2021-04-21 17:29   ` Brian Foster
@ 2021-04-21 20:37     ` Darrick J. Wong
  2021-04-22 11:25       ` Brian Foster
  0 siblings, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2021-04-21 20:37 UTC (permalink / raw)
  To: Brian Foster; +Cc: guaneryu, linux-xfs, fstests, guan

On Wed, Apr 21, 2021 at 01:29:36PM -0400, Brian Foster wrote:
> On Tue, Apr 20, 2021 at 05:22:41PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Make sure that the needsrepair feature flag can be cleared only by
> > repair and that mounts are prohibited when the feature is set.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  common/xfs        |   28 ++++++++++++++++++
> >  tests/xfs/768     |   80 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  tests/xfs/768.out |    4 +++
> >  tests/xfs/770     |   83 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  tests/xfs/770.out |    2 +
> >  tests/xfs/group   |    2 +
> >  6 files changed, 199 insertions(+)
> >  create mode 100755 tests/xfs/768
> >  create mode 100644 tests/xfs/768.out
> >  create mode 100755 tests/xfs/770
> >  create mode 100644 tests/xfs/770.out
> > 
> > 
> > diff --git a/common/xfs b/common/xfs
> > index 887bd001..c2384146 100644
> > --- a/common/xfs
> > +++ b/common/xfs
> > @@ -312,6 +312,13 @@ _scratch_xfs_check()
> >  	_xfs_check $SCRATCH_OPTIONS $* $SCRATCH_DEV
> >  }
> >  
> > +_require_libxfs_debug_flag() {
> > +	local hook="$1"
> > +
> > +	grep -q LIBXFS_DEBUG_WRITE_CRASH "$(type -P xfs_repair)" || \
> 
> Did you mean to use $hook here?

Yes.

> > +		_notrun "libxfs debug hook $hook not detected?"
> > +}
> > +
> >  _scratch_xfs_repair()
> >  {
> >  	SCRATCH_OPTIONS=""
> > @@ -1114,3 +1121,24 @@ _xfs_get_cowgc_interval() {
> >  		_fail "Can't find cowgc interval procfs knob?"
> >  	fi
> >  }
> > +
> > +# Print the status of the given features on the scratch filesystem.
> > +# Returns 0 if all features are found, 1 otherwise.
> > +_check_scratch_xfs_features()
> > +{
> > +	local features="$(_scratch_xfs_db -c 'version')"
> > +	local output=("FEATURES:")
> > +	local found=0
> > +
> > +	for feature in "$@"; do
> > +		local status="NO"
> > +		if echo "${features}" | grep -q -w "${feature}"; then
> > +			status="YES"
> > +			found=$((found + 1))
> > +		fi
> > +		output+=("${feature}:${status}")
> > +	done
> > +
> > +	echo "${output[@]}"
> > +	test "${found}" -eq "$#"
> > +}
> > diff --git a/tests/xfs/768 b/tests/xfs/768
> > new file mode 100755
> > index 00000000..e6301829
> > --- /dev/null
> > +++ b/tests/xfs/768
> > @@ -0,0 +1,80 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0-or-later
> > +# Copyright (c) 2021 Oracle.  All Rights Reserved.
> > +#
> > +# FS QA Test No. 768
> > +#
> > +# Make sure that the kernel won't mount a filesystem if repair forcibly sets
> > +# NEEDSREPAIR while fixing metadata.  Corrupt a directory in such a way as
> > +# to force repair to write an invalid dirent value as a sentinel to trigger a
> > +# repair activity in a later phase.  Use a debug knob in xfs_repair to abort
> > +# the repair immediately after forcing the flag on.
> > +
> > +seq=`basename $0`
> > +seqres=$RESULT_DIR/$seq
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +tmp=/tmp/$$
> > +status=1    # failure is the default!
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +_cleanup()
> > +{
> > +	cd /
> > +	rm -f $tmp.*
> > +}
> > +
> > +# get standard environment, filters and checks
> > +. ./common/rc
> > +. ./common/filter
> > +
> > +# real QA test starts here
> > +_supported_fs xfs
> > +_require_scratch_nocheck
> > +_require_scratch_xfs_crc		# needsrepair only exists for v5
> > +_require_libxfs_debug_flag LIBXFS_DEBUG_WRITE_CRASH
> > +
> > +rm -f $seqres.full
> > +
> > +# Set up a real filesystem for our actual test
> > +_scratch_mkfs -m crc=1 >> $seqres.full
> 
> I don't think there's a need to explicitly format with -mcrc=1 when the
> require above would filter out the test anyways (which I think is fine
> since v5 has been default for some time now). Otherwise this test LGTM.

<nod> Fixed.

> > +
> > +# Create a directory large enough to have a dir data block.  2k worth of
> > +# dirent names ought to do it.
> > +_scratch_mount
> > +mkdir -p $SCRATCH_MNT/fubar
> > +for i in $(seq 0 256 2048); do
> > +	fname=$(printf "%0255d" $i)
> > +	ln -s -f urk $SCRATCH_MNT/fubar/$fname
> > +done
> > +inum=$(stat -c '%i' $SCRATCH_MNT/fubar)
> > +_scratch_unmount
> > +
> > +# Fuzz the directory
> > +_scratch_xfs_db -x -c "inode $inum" -c "dblock 0" \
> > +	-c "fuzz -d bu[2].inumber add" >> $seqres.full
> > +
> > +# Try to repair the directory, force it to crash after setting needsrepair
> > +LIBXFS_DEBUG_WRITE_CRASH=ddev=2 _scratch_xfs_repair 2>> $seqres.full
> > +test $? -eq 137 || echo "repair should have been killed??"
> > +
> > +# We can't mount, right?
> > +_check_scratch_xfs_features NEEDSREPAIR
> > +_try_scratch_mount &> $tmp.mount
> > +res=$?
> > +_filter_scratch < $tmp.mount
> > +if [ $res -eq 0 ]; then
> > +	echo "Should not be able to mount after needsrepair crash"
> > +	_scratch_unmount
> > +fi
> > +
> > +# Repair properly this time and retry the mount
> > +_scratch_xfs_repair 2>> $seqres.full
> > +_check_scratch_xfs_features NEEDSREPAIR
> > +
> > +_scratch_mount
> > +
> > +# success, all done
> > +status=0
> > +exit
> ...
> > diff --git a/tests/xfs/770 b/tests/xfs/770
> > new file mode 100755
> > index 00000000..40e67ab5
> > --- /dev/null
> > +++ b/tests/xfs/770
> > @@ -0,0 +1,83 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0-or-later
> > +# Copyright (c) 2021 Oracle.  All Rights Reserved.
> > +#
> > +# FS QA Test No. 770
> > +#
> > +# Populate a filesystem with all types of metadata, then run repair with the
> > +# libxfs write failure trigger set to go after a single write.  Check that the
> > +# injected error trips, causing repair to abort, that needsrepair is set on the
> > +# fs, the kernel won't mount; and that a non-injecting repair run clears
> > +# needsrepair and makes the filesystem mountable again.
> > +#
> > +# Repeat with the trip point set to successively higher numbers of writes until
> > +# we hit ~200 writes or repair manages to run to completion without tripping.
> > +
> > +seq=`basename $0`
> > +seqres=$RESULT_DIR/$seq
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +tmp=/tmp/$$
> > +status=1    # failure is the default!
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +_cleanup()
> > +{
> > +	cd /
> > +	rm -f $tmp.*
> > +}
> > +
> > +# get standard environment, filters and checks
> > +. ./common/rc
> > +. ./common/populate
> > +. ./common/filter
> > +
> > +# real QA test starts here
> > +_supported_fs xfs
> > +_require_scratch_nocheck
> > +_require_scratch_xfs_crc		# needsrepair only exists for v5
> > +_require_populate_commands
> > +_require_libxfs_debug_flag LIBXFS_DEBUG_WRITE_CRASH
> > +
> > +rm -f $seqres.full
> > +
> > +# Populate the filesystem
> > +_scratch_populate_cached nofill >> $seqres.full 2>&1
> > +
> > +max_writes=200			# 200 loops should be enough for anyone
> > +nr_incr=$((13 / TIME_FACTOR))
> 
> Could we randomize this increment so we get varying behavior run to run?
> It might be nice to actually do that on a per-iteration basis as well
> rather than once at the start of the test.

Ok, I'll add a little bit of randomness to each run.

How about:

	local crash_after=$(( nr_writes + ((RANDOM % 7) - 3) ))
	LIBXFS_DEBUG_WRITE_CRASH=ddev=$crash_after _scratch_xfs_repair

?

> > +test $nr_incr -lt 1 && nr_incr=1
> > +for ((nr_writes = 1; nr_writes < max_writes; nr_writes += nr_incr)); do
> > +	# Start a repair and force it to abort after some number of writes
> > +	LIBXFS_DEBUG_WRITE_CRASH=ddev=$nr_writes \
> > +			_scratch_xfs_repair 2>> $seqres.full
> > +	res=$?
> > +	if [ $res -ne 0 ] && [ $res -ne 137 ]; then
> > +		echo "repair failed with $res??"
> > +		break
> > +	elif [ $res -eq 0 ]; then
> > +		[ $nr_writes -eq 1 ] && \
> > +			echo "ran to completion on the first try?"
> > +		break
> > +	fi
> > +
> > +	# Check the state of NEEDSREPAIR after repair fails.  If it isn't set
> > +	# but if repair -n says the fs is clean, then it's possible that the
> > +	# injected error caused it to abort immediately after the write that
> > +	# cleared NEEDSREPAIR.
> > +	if ! _check_scratch_xfs_features NEEDSREPAIR > /dev/null &&
> > +	   ! _scratch_xfs_repair -n &>> $seqres.full; then
> > +		echo "NEEDSREPAIR should be set on corrupt fs"
> > +	fi
> > +
> > +	# Repair properly this time and retry the mount
> 
> We can probably drop the "retry the mount" bit since we no longer do
> that.

Oops, yes, good catch.

> > +	_scratch_xfs_repair 2>> $seqres.full
> > +	_check_scratch_xfs_features NEEDSREPAIR > /dev/null && \
> > +		echo "Repair failed to clear NEEDSREPAIR on the $nr_writes writes test"
> 
> Maybe I'm mistaken, but I thought we were going to let repair run and
> fail repeatedly/incrementally and then leave the full repair for the
> end..?

Ah.  Last time you wrote:

"It probably makes sense to test that NEEDSREPAIR remains set throughout
the test sequence until repair completes cleanly..."

and I interpreted "throughout the test sequence" to mean "until the
end of the loop body", not the whole test.

But I guess it /would/ be more interesting to study the effects of
multiple successive write failures. ;)

--D

> 
> Brian
> 
> > +done
> > +
> > +# success, all done
> > +echo Silence is golden.
> > +status=0
> > +exit
> > diff --git a/tests/xfs/770.out b/tests/xfs/770.out
> > new file mode 100644
> > index 00000000..725d740b
> > --- /dev/null
> > +++ b/tests/xfs/770.out
> > @@ -0,0 +1,2 @@
> > +QA output created by 770
> > +Silence is golden.
> > diff --git a/tests/xfs/group b/tests/xfs/group
> > index d1b1456b..461ae2b2 100644
> > --- a/tests/xfs/group
> > +++ b/tests/xfs/group
> > @@ -522,3 +522,5 @@
> >  537 auto quick
> >  538 auto stress
> >  539 auto quick mount
> > +768 auto quick repair
> > +770 auto repair
> > 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/1] xfs: test that the needsrepair feature works as advertised
  2021-04-21  0:22 ` [PATCH 1/1] xfs: test that the needsrepair feature works as advertised Darrick J. Wong
  2021-04-21  6:01   ` Amir Goldstein
@ 2021-04-21 17:29   ` Brian Foster
  2021-04-21 20:37     ` Darrick J. Wong
  1 sibling, 1 reply; 10+ messages in thread
From: Brian Foster @ 2021-04-21 17:29 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: guaneryu, linux-xfs, fstests, guan

On Tue, Apr 20, 2021 at 05:22:41PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Make sure that the needsrepair feature flag can be cleared only by
> repair and that mounts are prohibited when the feature is set.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  common/xfs        |   28 ++++++++++++++++++
>  tests/xfs/768     |   80 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/xfs/768.out |    4 +++
>  tests/xfs/770     |   83 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/xfs/770.out |    2 +
>  tests/xfs/group   |    2 +
>  6 files changed, 199 insertions(+)
>  create mode 100755 tests/xfs/768
>  create mode 100644 tests/xfs/768.out
>  create mode 100755 tests/xfs/770
>  create mode 100644 tests/xfs/770.out
> 
> 
> diff --git a/common/xfs b/common/xfs
> index 887bd001..c2384146 100644
> --- a/common/xfs
> +++ b/common/xfs
> @@ -312,6 +312,13 @@ _scratch_xfs_check()
>  	_xfs_check $SCRATCH_OPTIONS $* $SCRATCH_DEV
>  }
>  
> +_require_libxfs_debug_flag() {
> +	local hook="$1"
> +
> +	grep -q LIBXFS_DEBUG_WRITE_CRASH "$(type -P xfs_repair)" || \

Did you mean to use $hook here?

> +		_notrun "libxfs debug hook $hook not detected?"
> +}
> +
>  _scratch_xfs_repair()
>  {
>  	SCRATCH_OPTIONS=""
> @@ -1114,3 +1121,24 @@ _xfs_get_cowgc_interval() {
>  		_fail "Can't find cowgc interval procfs knob?"
>  	fi
>  }
> +
> +# Print the status of the given features on the scratch filesystem.
> +# Returns 0 if all features are found, 1 otherwise.
> +_check_scratch_xfs_features()
> +{
> +	local features="$(_scratch_xfs_db -c 'version')"
> +	local output=("FEATURES:")
> +	local found=0
> +
> +	for feature in "$@"; do
> +		local status="NO"
> +		if echo "${features}" | grep -q -w "${feature}"; then
> +			status="YES"
> +			found=$((found + 1))
> +		fi
> +		output+=("${feature}:${status}")
> +	done
> +
> +	echo "${output[@]}"
> +	test "${found}" -eq "$#"
> +}
> diff --git a/tests/xfs/768 b/tests/xfs/768
> new file mode 100755
> index 00000000..e6301829
> --- /dev/null
> +++ b/tests/xfs/768
> @@ -0,0 +1,80 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +# Copyright (c) 2021 Oracle.  All Rights Reserved.
> +#
> +# FS QA Test No. 768
> +#
> +# Make sure that the kernel won't mount a filesystem if repair forcibly sets
> +# NEEDSREPAIR while fixing metadata.  Corrupt a directory in such a way as
> +# to force repair to write an invalid dirent value as a sentinel to trigger a
> +# repair activity in a later phase.  Use a debug knob in xfs_repair to abort
> +# the repair immediately after forcing the flag on.
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1    # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +# real QA test starts here
> +_supported_fs xfs
> +_require_scratch_nocheck
> +_require_scratch_xfs_crc		# needsrepair only exists for v5
> +_require_libxfs_debug_flag LIBXFS_DEBUG_WRITE_CRASH
> +
> +rm -f $seqres.full
> +
> +# Set up a real filesystem for our actual test
> +_scratch_mkfs -m crc=1 >> $seqres.full

I don't think there's a need to explicitly format with -mcrc=1 when the
require above would filter out the test anyways (which I think is fine
since v5 has been default for some time now). Otherwise this test LGTM.

> +
> +# Create a directory large enough to have a dir data block.  2k worth of
> +# dirent names ought to do it.
> +_scratch_mount
> +mkdir -p $SCRATCH_MNT/fubar
> +for i in $(seq 0 256 2048); do
> +	fname=$(printf "%0255d" $i)
> +	ln -s -f urk $SCRATCH_MNT/fubar/$fname
> +done
> +inum=$(stat -c '%i' $SCRATCH_MNT/fubar)
> +_scratch_unmount
> +
> +# Fuzz the directory
> +_scratch_xfs_db -x -c "inode $inum" -c "dblock 0" \
> +	-c "fuzz -d bu[2].inumber add" >> $seqres.full
> +
> +# Try to repair the directory, force it to crash after setting needsrepair
> +LIBXFS_DEBUG_WRITE_CRASH=ddev=2 _scratch_xfs_repair 2>> $seqres.full
> +test $? -eq 137 || echo "repair should have been killed??"
> +
> +# We can't mount, right?
> +_check_scratch_xfs_features NEEDSREPAIR
> +_try_scratch_mount &> $tmp.mount
> +res=$?
> +_filter_scratch < $tmp.mount
> +if [ $res -eq 0 ]; then
> +	echo "Should not be able to mount after needsrepair crash"
> +	_scratch_unmount
> +fi
> +
> +# Repair properly this time and retry the mount
> +_scratch_xfs_repair 2>> $seqres.full
> +_check_scratch_xfs_features NEEDSREPAIR
> +
> +_scratch_mount
> +
> +# success, all done
> +status=0
> +exit
...
> diff --git a/tests/xfs/770 b/tests/xfs/770
> new file mode 100755
> index 00000000..40e67ab5
> --- /dev/null
> +++ b/tests/xfs/770
> @@ -0,0 +1,83 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +# Copyright (c) 2021 Oracle.  All Rights Reserved.
> +#
> +# FS QA Test No. 770
> +#
> +# Populate a filesystem with all types of metadata, then run repair with the
> +# libxfs write failure trigger set to go after a single write.  Check that the
> +# injected error trips, causing repair to abort, that needsrepair is set on the
> +# fs, the kernel won't mount; and that a non-injecting repair run clears
> +# needsrepair and makes the filesystem mountable again.
> +#
> +# Repeat with the trip point set to successively higher numbers of writes until
> +# we hit ~200 writes or repair manages to run to completion without tripping.
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1    # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/populate
> +. ./common/filter
> +
> +# real QA test starts here
> +_supported_fs xfs
> +_require_scratch_nocheck
> +_require_scratch_xfs_crc		# needsrepair only exists for v5
> +_require_populate_commands
> +_require_libxfs_debug_flag LIBXFS_DEBUG_WRITE_CRASH
> +
> +rm -f $seqres.full
> +
> +# Populate the filesystem
> +_scratch_populate_cached nofill >> $seqres.full 2>&1
> +
> +max_writes=200			# 200 loops should be enough for anyone
> +nr_incr=$((13 / TIME_FACTOR))

Could we randomize this increment so we get varying behavior run to run?
It might be nice to actually do that on a per-iteration basis as well
rather than once at the start of the test.

> +test $nr_incr -lt 1 && nr_incr=1
> +for ((nr_writes = 1; nr_writes < max_writes; nr_writes += nr_incr)); do
> +	# Start a repair and force it to abort after some number of writes
> +	LIBXFS_DEBUG_WRITE_CRASH=ddev=$nr_writes \
> +			_scratch_xfs_repair 2>> $seqres.full
> +	res=$?
> +	if [ $res -ne 0 ] && [ $res -ne 137 ]; then
> +		echo "repair failed with $res??"
> +		break
> +	elif [ $res -eq 0 ]; then
> +		[ $nr_writes -eq 1 ] && \
> +			echo "ran to completion on the first try?"
> +		break
> +	fi
> +
> +	# Check the state of NEEDSREPAIR after repair fails.  If it isn't set
> +	# but if repair -n says the fs is clean, then it's possible that the
> +	# injected error caused it to abort immediately after the write that
> +	# cleared NEEDSREPAIR.
> +	if ! _check_scratch_xfs_features NEEDSREPAIR > /dev/null &&
> +	   ! _scratch_xfs_repair -n &>> $seqres.full; then
> +		echo "NEEDSREPAIR should be set on corrupt fs"
> +	fi
> +
> +	# Repair properly this time and retry the mount

We can probably drop the "retry the mount" bit since we no longer do
that.

> +	_scratch_xfs_repair 2>> $seqres.full
> +	_check_scratch_xfs_features NEEDSREPAIR > /dev/null && \
> +		echo "Repair failed to clear NEEDSREPAIR on the $nr_writes writes test"

Maybe I'm mistaken, but I thought we were going to let repair run and
fail repeatedly/incrementally and then leave the full repair for the
end..?

Brian

> +done
> +
> +# success, all done
> +echo Silence is golden.
> +status=0
> +exit
> diff --git a/tests/xfs/770.out b/tests/xfs/770.out
> new file mode 100644
> index 00000000..725d740b
> --- /dev/null
> +++ b/tests/xfs/770.out
> @@ -0,0 +1,2 @@
> +QA output created by 770
> +Silence is golden.
> diff --git a/tests/xfs/group b/tests/xfs/group
> index d1b1456b..461ae2b2 100644
> --- a/tests/xfs/group
> +++ b/tests/xfs/group
> @@ -522,3 +522,5 @@
>  537 auto quick
>  538 auto stress
>  539 auto quick mount
> +768 auto quick repair
> +770 auto repair
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/1] xfs: test that the needsrepair feature works as advertised
  2021-04-21  6:01   ` Amir Goldstein
@ 2021-04-21 15:58     ` Darrick J. Wong
  0 siblings, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2021-04-21 15:58 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Eryu Guan, linux-xfs, fstests, Eryu Guan

On Wed, Apr 21, 2021 at 09:01:05AM +0300, Amir Goldstein wrote:
> On Wed, Apr 21, 2021 at 3:23 AM Darrick J. Wong <djwong@kernel.org> wrote:
> >
> > From: Darrick J. Wong <djwong@kernel.org>
> >
> > Make sure that the needsrepair feature flag can be cleared only by
> > repair and that mounts are prohibited when the feature is set.
> >
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  common/xfs        |   28 ++++++++++++++++++
> >  tests/xfs/768     |   80 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  tests/xfs/768.out |    4 +++
> >  tests/xfs/770     |   83 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  tests/xfs/770.out |    2 +
> >  tests/xfs/group   |    2 +
> >  6 files changed, 199 insertions(+)
> >  create mode 100755 tests/xfs/768
> >  create mode 100644 tests/xfs/768.out
> >  create mode 100755 tests/xfs/770
> >  create mode 100644 tests/xfs/770.out
> >
> >
> > diff --git a/common/xfs b/common/xfs
> > index 887bd001..c2384146 100644
> > --- a/common/xfs
> > +++ b/common/xfs
> > @@ -312,6 +312,13 @@ _scratch_xfs_check()
> >         _xfs_check $SCRATCH_OPTIONS $* $SCRATCH_DEV
> >  }
> >
> > +_require_libxfs_debug_flag() {
> > +       local hook="$1"
> > +
> > +       grep -q LIBXFS_DEBUG_WRITE_CRASH "$(type -P xfs_repair)" || \
> > +               _notrun "libxfs debug hook $hook not detected?"
> 
> You ignored the $hook arg.
> And this is a bit of a strange test.

Doh.  Will fix; thanks for noticing that. :)

> In _require_unionmount_testsuite() I also pass env vars to the test utility
> and I made it so the usage message will print the non empty env vars
> passed to the programm.
> 
> I can understand if nothing like that was done for xfs_repair and you want
> this test to work with an already released version of xfs_repair, but if that
> test is against a pre-released version of xfs_repair, I suggest to make it
> more friendly for _require check.

That would have been a good idea, but we already shipped this in 5.11.
:/

--D

> Thanks,
> Amir.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/1] xfs: test that the needsrepair feature works as advertised
  2021-04-21  0:22 ` [PATCH 1/1] xfs: test that the needsrepair feature works as advertised Darrick J. Wong
@ 2021-04-21  6:01   ` Amir Goldstein
  2021-04-21 15:58     ` Darrick J. Wong
  2021-04-21 17:29   ` Brian Foster
  1 sibling, 1 reply; 10+ messages in thread
From: Amir Goldstein @ 2021-04-21  6:01 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Eryu Guan, linux-xfs, fstests, Eryu Guan

On Wed, Apr 21, 2021 at 3:23 AM Darrick J. Wong <djwong@kernel.org> wrote:
>
> From: Darrick J. Wong <djwong@kernel.org>
>
> Make sure that the needsrepair feature flag can be cleared only by
> repair and that mounts are prohibited when the feature is set.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  common/xfs        |   28 ++++++++++++++++++
>  tests/xfs/768     |   80 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/xfs/768.out |    4 +++
>  tests/xfs/770     |   83 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/xfs/770.out |    2 +
>  tests/xfs/group   |    2 +
>  6 files changed, 199 insertions(+)
>  create mode 100755 tests/xfs/768
>  create mode 100644 tests/xfs/768.out
>  create mode 100755 tests/xfs/770
>  create mode 100644 tests/xfs/770.out
>
>
> diff --git a/common/xfs b/common/xfs
> index 887bd001..c2384146 100644
> --- a/common/xfs
> +++ b/common/xfs
> @@ -312,6 +312,13 @@ _scratch_xfs_check()
>         _xfs_check $SCRATCH_OPTIONS $* $SCRATCH_DEV
>  }
>
> +_require_libxfs_debug_flag() {
> +       local hook="$1"
> +
> +       grep -q LIBXFS_DEBUG_WRITE_CRASH "$(type -P xfs_repair)" || \
> +               _notrun "libxfs debug hook $hook not detected?"

You ignored the $hook arg.
And this is a bit of a strange test.
In _require_unionmount_testsuite() I also pass env vars to the test utility
and I made it so the usage message will print the non empty env vars
passed to the programm.

I can understand if nothing like that was done for xfs_repair and you want
this test to work with an already released version of xfs_repair, but if that
test is against a pre-released version of xfs_repair, I suggest to make it
more friendly for _require check.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/1] xfs: test that the needsrepair feature works as advertised
  2021-04-21  0:22 [PATCHSET v5 0/1] fstests: make sure NEEDSREPAIR feature stops mounts Darrick J. Wong
@ 2021-04-21  0:22 ` Darrick J. Wong
  2021-04-21  6:01   ` Amir Goldstein
  2021-04-21 17:29   ` Brian Foster
  0 siblings, 2 replies; 10+ messages in thread
From: Darrick J. Wong @ 2021-04-21  0:22 UTC (permalink / raw)
  To: djwong, guaneryu; +Cc: linux-xfs, fstests, guan

From: Darrick J. Wong <djwong@kernel.org>

Make sure that the needsrepair feature flag can be cleared only by
repair and that mounts are prohibited when the feature is set.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 common/xfs        |   28 ++++++++++++++++++
 tests/xfs/768     |   80 +++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/xfs/768.out |    4 +++
 tests/xfs/770     |   83 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/xfs/770.out |    2 +
 tests/xfs/group   |    2 +
 6 files changed, 199 insertions(+)
 create mode 100755 tests/xfs/768
 create mode 100644 tests/xfs/768.out
 create mode 100755 tests/xfs/770
 create mode 100644 tests/xfs/770.out


diff --git a/common/xfs b/common/xfs
index 887bd001..c2384146 100644
--- a/common/xfs
+++ b/common/xfs
@@ -312,6 +312,13 @@ _scratch_xfs_check()
 	_xfs_check $SCRATCH_OPTIONS $* $SCRATCH_DEV
 }
 
+_require_libxfs_debug_flag() {
+	local hook="$1"
+
+	grep -q LIBXFS_DEBUG_WRITE_CRASH "$(type -P xfs_repair)" || \
+		_notrun "libxfs debug hook $hook not detected?"
+}
+
 _scratch_xfs_repair()
 {
 	SCRATCH_OPTIONS=""
@@ -1114,3 +1121,24 @@ _xfs_get_cowgc_interval() {
 		_fail "Can't find cowgc interval procfs knob?"
 	fi
 }
+
+# Print the status of the given features on the scratch filesystem.
+# Returns 0 if all features are found, 1 otherwise.
+_check_scratch_xfs_features()
+{
+	local features="$(_scratch_xfs_db -c 'version')"
+	local output=("FEATURES:")
+	local found=0
+
+	for feature in "$@"; do
+		local status="NO"
+		if echo "${features}" | grep -q -w "${feature}"; then
+			status="YES"
+			found=$((found + 1))
+		fi
+		output+=("${feature}:${status}")
+	done
+
+	echo "${output[@]}"
+	test "${found}" -eq "$#"
+}
diff --git a/tests/xfs/768 b/tests/xfs/768
new file mode 100755
index 00000000..e6301829
--- /dev/null
+++ b/tests/xfs/768
@@ -0,0 +1,80 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2021 Oracle.  All Rights Reserved.
+#
+# FS QA Test No. 768
+#
+# Make sure that the kernel won't mount a filesystem if repair forcibly sets
+# NEEDSREPAIR while fixing metadata.  Corrupt a directory in such a way as
+# to force repair to write an invalid dirent value as a sentinel to trigger a
+# repair activity in a later phase.  Use a debug knob in xfs_repair to abort
+# the repair immediately after forcing the flag on.
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1    # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs xfs
+_require_scratch_nocheck
+_require_scratch_xfs_crc		# needsrepair only exists for v5
+_require_libxfs_debug_flag LIBXFS_DEBUG_WRITE_CRASH
+
+rm -f $seqres.full
+
+# Set up a real filesystem for our actual test
+_scratch_mkfs -m crc=1 >> $seqres.full
+
+# Create a directory large enough to have a dir data block.  2k worth of
+# dirent names ought to do it.
+_scratch_mount
+mkdir -p $SCRATCH_MNT/fubar
+for i in $(seq 0 256 2048); do
+	fname=$(printf "%0255d" $i)
+	ln -s -f urk $SCRATCH_MNT/fubar/$fname
+done
+inum=$(stat -c '%i' $SCRATCH_MNT/fubar)
+_scratch_unmount
+
+# Fuzz the directory
+_scratch_xfs_db -x -c "inode $inum" -c "dblock 0" \
+	-c "fuzz -d bu[2].inumber add" >> $seqres.full
+
+# Try to repair the directory, force it to crash after setting needsrepair
+LIBXFS_DEBUG_WRITE_CRASH=ddev=2 _scratch_xfs_repair 2>> $seqres.full
+test $? -eq 137 || echo "repair should have been killed??"
+
+# We can't mount, right?
+_check_scratch_xfs_features NEEDSREPAIR
+_try_scratch_mount &> $tmp.mount
+res=$?
+_filter_scratch < $tmp.mount
+if [ $res -eq 0 ]; then
+	echo "Should not be able to mount after needsrepair crash"
+	_scratch_unmount
+fi
+
+# Repair properly this time and retry the mount
+_scratch_xfs_repair 2>> $seqres.full
+_check_scratch_xfs_features NEEDSREPAIR
+
+_scratch_mount
+
+# success, all done
+status=0
+exit
diff --git a/tests/xfs/768.out b/tests/xfs/768.out
new file mode 100644
index 00000000..1168ba25
--- /dev/null
+++ b/tests/xfs/768.out
@@ -0,0 +1,4 @@
+QA output created by 768
+FEATURES: NEEDSREPAIR:YES
+mount: SCRATCH_MNT: mount(2) system call failed: Structure needs cleaning.
+FEATURES: NEEDSREPAIR:NO
diff --git a/tests/xfs/770 b/tests/xfs/770
new file mode 100755
index 00000000..40e67ab5
--- /dev/null
+++ b/tests/xfs/770
@@ -0,0 +1,83 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2021 Oracle.  All Rights Reserved.
+#
+# FS QA Test No. 770
+#
+# Populate a filesystem with all types of metadata, then run repair with the
+# libxfs write failure trigger set to go after a single write.  Check that the
+# injected error trips, causing repair to abort, that needsrepair is set on the
+# fs, the kernel won't mount; and that a non-injecting repair run clears
+# needsrepair and makes the filesystem mountable again.
+#
+# Repeat with the trip point set to successively higher numbers of writes until
+# we hit ~200 writes or repair manages to run to completion without tripping.
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1    # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/populate
+. ./common/filter
+
+# real QA test starts here
+_supported_fs xfs
+_require_scratch_nocheck
+_require_scratch_xfs_crc		# needsrepair only exists for v5
+_require_populate_commands
+_require_libxfs_debug_flag LIBXFS_DEBUG_WRITE_CRASH
+
+rm -f $seqres.full
+
+# Populate the filesystem
+_scratch_populate_cached nofill >> $seqres.full 2>&1
+
+max_writes=200			# 200 loops should be enough for anyone
+nr_incr=$((13 / TIME_FACTOR))
+test $nr_incr -lt 1 && nr_incr=1
+for ((nr_writes = 1; nr_writes < max_writes; nr_writes += nr_incr)); do
+	# Start a repair and force it to abort after some number of writes
+	LIBXFS_DEBUG_WRITE_CRASH=ddev=$nr_writes \
+			_scratch_xfs_repair 2>> $seqres.full
+	res=$?
+	if [ $res -ne 0 ] && [ $res -ne 137 ]; then
+		echo "repair failed with $res??"
+		break
+	elif [ $res -eq 0 ]; then
+		[ $nr_writes -eq 1 ] && \
+			echo "ran to completion on the first try?"
+		break
+	fi
+
+	# Check the state of NEEDSREPAIR after repair fails.  If it isn't set
+	# but if repair -n says the fs is clean, then it's possible that the
+	# injected error caused it to abort immediately after the write that
+	# cleared NEEDSREPAIR.
+	if ! _check_scratch_xfs_features NEEDSREPAIR > /dev/null &&
+	   ! _scratch_xfs_repair -n &>> $seqres.full; then
+		echo "NEEDSREPAIR should be set on corrupt fs"
+	fi
+
+	# Repair properly this time and retry the mount
+	_scratch_xfs_repair 2>> $seqres.full
+	_check_scratch_xfs_features NEEDSREPAIR > /dev/null && \
+		echo "Repair failed to clear NEEDSREPAIR on the $nr_writes writes test"
+done
+
+# success, all done
+echo Silence is golden.
+status=0
+exit
diff --git a/tests/xfs/770.out b/tests/xfs/770.out
new file mode 100644
index 00000000..725d740b
--- /dev/null
+++ b/tests/xfs/770.out
@@ -0,0 +1,2 @@
+QA output created by 770
+Silence is golden.
diff --git a/tests/xfs/group b/tests/xfs/group
index d1b1456b..461ae2b2 100644
--- a/tests/xfs/group
+++ b/tests/xfs/group
@@ -522,3 +522,5 @@
 537 auto quick
 538 auto stress
 539 auto quick mount
+768 auto quick repair
+770 auto repair


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-04-22 11:25 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-14  1:05 [PATCHSET v4 0/1] fstests: make sure NEEDSREPAIR feature stops mounts Darrick J. Wong
2021-04-14  1:05 ` [PATCH 1/1] xfs: test that the needsrepair feature works as advertised Darrick J. Wong
2021-04-18 12:14   ` Eryu Guan
2021-04-20 18:26     ` Darrick J. Wong
2021-04-21  0:22 [PATCHSET v5 0/1] fstests: make sure NEEDSREPAIR feature stops mounts Darrick J. Wong
2021-04-21  0:22 ` [PATCH 1/1] xfs: test that the needsrepair feature works as advertised Darrick J. Wong
2021-04-21  6:01   ` Amir Goldstein
2021-04-21 15:58     ` Darrick J. Wong
2021-04-21 17:29   ` Brian Foster
2021-04-21 20:37     ` Darrick J. Wong
2021-04-22 11:25       ` Brian Foster

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).