* [PATCH 1/2] add generic/585
@ 2019-10-31 10:36 Dmitry Monakhov
2019-10-31 10:36 ` [PATCH 2/2] add generic/586 quota stress test Dmitry Monakhov
2019-11-10 16:33 ` [PATCH 1/2] add generic/585 Eryu Guan
0 siblings, 2 replies; 4+ messages in thread
From: Dmitry Monakhov @ 2019-10-31 10:36 UTC (permalink / raw)
To: fstests; +Cc: jack, Dmitry Monakhov
From: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
Quotasync may livelock if others tasks generate enough dirty dquots in parallel
This test case pefrorm fchown to produce dirty quotas
This test known to detect livelock non-journaled quota for kernels prior to v5.4
Signed-off-by: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
---
src/Makefile | 2 +-
src/chowner.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++
tests/generic/585 | 94 ++++++++++++++++++++++++++++++++++++++++++++
tests/generic/585.out | 2 +
tests/generic/group | 1 +
5 files changed, 205 insertions(+), 1 deletion(-)
create mode 100644 src/chowner.c
create mode 100755 tests/generic/585
create mode 100644 tests/generic/585.out
diff --git a/src/Makefile b/src/Makefile
index ce6d861..b3ab7b4 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -28,7 +28,7 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
attr-list-by-handle-cursor-test listxattr dio-interleaved t_dir_type \
dio-invalidate-cache stat_test t_encrypted_d_revalidate \
attr_replace_test swapon mkswap t_attr_corruption t_open_tmpfiles \
- fscrypt-crypt-util bulkstat_null_ocount
+ fscrypt-crypt-util bulkstat_null_ocount chowner
SUBDIRS = log-writes perf
diff --git a/src/chowner.c b/src/chowner.c
new file mode 100644
index 0000000..043cd06
--- /dev/null
+++ b/src/chowner.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2019 YANDEX LLC. All Rights Reserved.
+// Author: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <fcntl.h>
+#include <limits.h>
+
+char *progname;
+
+void usage(void)
+{
+ printf("Usage %s: [-b uid] [-e uid] [-g] [-P] [-t timeout] FNAME\n", progname);
+ printf("\t\t -b: begin of uid range\n");
+ printf("\t\t -e: length of uid range\n");
+ printf("\t\t -g: change group uid\n");
+ printf("\t\t -t: loop timeout\n");
+ printf("\t\t -P: Do not run chowner loop, only prepare files\n");
+ exit(1);
+}
+int main(int argc, char *const *argv)
+{
+ char *filename;
+ int fd;
+ int c;
+ uid_t begin = 2000;
+ uid_t end = 12000;
+ unsigned int timeout = 10;
+ struct timeval start, now, delta = { 0, 0 };
+ uid_t uid;
+ gid_t gid = getegid();
+ int do_group = 0;
+ int do_prepare = 0;
+
+ progname = argv[0];
+ while ((c = getopt(argc, argv, "b:e:gPt:")) != -1) {
+ switch (c) {
+ case 'b':
+ begin = atoi(optarg);
+ break;
+ case 'e':
+ end = atoi(optarg);
+ break;
+ case 'g':
+ do_group = 1;
+ break;
+ case 'P':
+ do_prepare = 1;
+ break;
+ case 't':
+ timeout = atoi(optarg);
+ break;
+ default:
+ usage();
+ }
+ }
+ if (optind == argc-1)
+ filename = argv[optind];
+ else
+ usage();
+ if (do_prepare) {
+ char path[PATH_MAX];
+ for (uid = begin; uid < end;uid++) {
+ sprintf(path, "%s.%d", filename, uid);
+ fd = open(path, O_RDWR|O_CREAT, 0666);
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+ if (do_group)
+ gid = uid;
+ if (fchown(fd, uid, gid)) {
+ perror("chown");
+ exit(1);
+ }
+ close(fd);
+ }
+ return 0;
+ }
+ fd = open(filename, O_RDWR|O_CREAT, 0666);
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+ gettimeofday(&start, NULL);
+
+ while (1) {
+ for (uid = begin; uid < end;uid++) {
+ if (do_group)
+ gid = uid;
+ if (fchown(fd, uid, gid)) {
+ perror("chown");
+ exit(1);
+ }
+ }
+ gettimeofday(&now, NULL);
+ timersub(&now, &start, &delta);
+ if (delta.tv_sec >= timeout)
+ break;
+ }
+ return 0;
+}
diff --git a/tests/generic/585 b/tests/generic/585
new file mode 100755
index 0000000..d37522a
--- /dev/null
+++ b/tests/generic/585
@@ -0,0 +1,94 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2019 YANDEX LLC. All Rights Reserved.
+#
+# FS QA Test 585
+#
+# Check livelock during quota-sync while other tasks dirty quotas in parallel.
+# Run fchown(2) in a loop is the fastest way to produce dirty quotas
+#
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+ cd /
+ rm -f $tmp.*
+}
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/quota
+
+
+begin=2000
+end=10000
+nr_proc=$((16 * LOAD_FACTOR))
+deadline=$((100 * TIME_FACTOR))
+
+_workout()
+{
+ for ((i=0; i < nr_proc; i++))
+ do
+ # Spread files to isolated dirs to minimize locking contention
+ mkdir -p $SCRATCH_MNT/chowner/$i
+ $here/src/chowner $SCRATCH_MNT/chowner/$i/test -b $begin -e $end \
+ -t $((deadline + 10)) &
+ pids="$pids $!"
+ done
+ # Let chowners warm up ...
+ sleep 5
+ start=$(date +%s)
+ for ((i=0;i<3;i++))
+ do
+ s=$(date +%s)
+ # In normal situation command should finish in ~1sec,
+ # but in case of livelock it will spin until chowners exits
+ $*
+ e=$(date +%s)
+ echo "loop $i: $* runtime: $((e-s))" >> $seqres.full
+ sleep 2
+ done
+ end=$(date +%s)
+ runtime=$((end-start))
+ echo "DONE: $* total runtime: $runtime" >> $seqres.full
+ kill -TERM $pids 2> /dev/null
+ wait $pids
+
+ [ $runtime -le $deadline ] || \
+ _fail "Live lock detected, $* runtime: $runtime, deadline: $deadline"
+}
+
+# real QA test starts here
+_supported_fs generic
+_supported_os Linux
+_require_quota
+#_require_user
+_require_scratch
+_require_command "$KILLALL_PROG" killall
+
+rm -f $seqres.full
+_scratch_mkfs_sized $((512 * 1024 * 1024)) >> $seqres.full 2>&1
+_scratch_mount "-o quota,user"
+chmod 777 $SCRATCH_MNT
+quotacheck -u $SCRATCH_MNT 2>/dev/null
+quotaon -v -u $SCRATCH_MNT >> $seqres.full 2>&1
+
+# Preparation step: Create all files with uid in range
+# to cache quota in kernel memory
+mkdir -p $SCRATCH_MNT/q
+$here/src/chowner $SCRATCH_MNT/q -b $begin -e $end -P
+
+_workout quotasync $SCRATCH_MNT
+_workout sync
+
+echo "Silence is golden"
+# success, all done
+status=0
+exit
diff --git a/tests/generic/585.out b/tests/generic/585.out
new file mode 100644
index 0000000..e4dd43b
--- /dev/null
+++ b/tests/generic/585.out
@@ -0,0 +1,2 @@
+QA output created by 585
+Silence is golden
diff --git a/tests/generic/group b/tests/generic/group
index 42ca2b9..703a1b4 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -587,3 +587,4 @@
582 auto quick encrypt
583 auto quick encrypt
584 auto quick encrypt
+585 auto quota rw stress
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] add generic/586 quota stress test
2019-10-31 10:36 [PATCH 1/2] add generic/585 Dmitry Monakhov
@ 2019-10-31 10:36 ` Dmitry Monakhov
2019-11-10 16:33 ` [PATCH 1/2] add generic/585 Eryu Guan
1 sibling, 0 replies; 4+ messages in thread
From: Dmitry Monakhov @ 2019-10-31 10:36 UTC (permalink / raw)
To: fstests; +Cc: jack, Dmitry Monakhov
From: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
Regression test for mark_dquot_dirty() vs dqput() race
Stress test quota quota code via fchown(2) loop. In fact fchown
is the best way to produce a lot of short lived dirty dquot objects.
This test known to t produce non fatal dmesg error on kernel prior to v5.4
Example: "Quota error (device vdb): dqput: Can't write quota structure (error -5). Quota may get out of sync!"
Signed-off-by: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
---
tests/generic/586 | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++
tests/generic/586.out | 2 ++
tests/generic/group | 1 +
3 files changed, 90 insertions(+)
create mode 100755 tests/generic/586
create mode 100755 tests/generic/586.out
diff --git a/tests/generic/586 b/tests/generic/586
new file mode 100755
index 0000000..279eb55
--- /dev/null
+++ b/tests/generic/586
@@ -0,0 +1,87 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2019 YANDEX LLC. All Rights Reserved.
+#
+# FS QA Test 586
+#
+# Stress quotasync while other tasks dirty quotas in parallel.
+# Run fchown(2) in a loop is the fastest way to produce dirty quotas
+#
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+ cd /
+ rm -f $tmp.*
+}
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/quota
+
+
+
+
+# real QA test starts here
+_supported_fs generic
+_supported_os Linux
+_require_quota
+_require_scratch
+_require_check_dmesg
+
+nr_proc=$((32 * LOAD_FACTOR))
+runtime=$((60 * TIME_FACTOR))
+
+rm -f $seqres.full
+_scratch_mkfs_sized $((512 * 1024 * 1024)) >> $seqres.full 2>&1
+_scratch_mount "-o quota,user"
+chmod 777 $SCRATCH_MNT
+quotacheck -u $SCRATCH_MNT 2>/dev/null
+quotaon -v -u $SCRATCH_MNT >> $seqres.full 2>&1
+
+# Preparation step: Create all files with uid in range
+# to cache quota in kernel memory
+#mkdir -p $SCRATCH_MNT/q
+#$here/src/chowner $SCRATCH_MNT/q -b 2000 -e 2010 -P
+
+for ((i=0; i < nr_proc; i++))
+do
+ # Spread files to isolated dirs to minimize locking contention
+ mkdir -p $SCRATCH_MNT/chowner/$i
+ # Performs fchown in small uid range [2019,2042] in order stress
+ # quota code
+ $here/src/chowner $SCRATCH_MNT/chowner/$i/test -b 2019 -e 2042 \
+ -t $((runtime)) &
+ pids="$pids $!"
+done
+
+start=$(date +%s)
+deadline=$((start + runtime))
+i=0
+while true
+do
+ quotasync -u $SCRATCH_MNT
+ now=$(date +%s)
+ [ $now -le $deadline ] || break
+ i=$((i+1))
+done
+echo "Done after $i loops" >> $seqres.full
+
+kill -TERM $pids 2> /dev/null
+wait $pids
+
+# Check for error message that happed due to quota inconsistency
+# Example messages:
+# "Quota error (device vdb): dqput: Can't write quota structure (error -5). Quota may get out of sync!"
+_dmesg_since_test_start | egrep -e 'Quota error '
+echo "Silence is golden"
+# success, all done
+status=0
+exit
diff --git a/tests/generic/586.out b/tests/generic/586.out
new file mode 100755
index 0000000..3d36442
--- /dev/null
+++ b/tests/generic/586.out
@@ -0,0 +1,2 @@
+QA output created by 586
+Silence is golden
diff --git a/tests/generic/group b/tests/generic/group
index 703a1b4..76d7d4f 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -588,3 +588,4 @@
583 auto quick encrypt
584 auto quick encrypt
585 auto quota rw stress
+586 auto quota rw stress
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 1/2] add generic/585
2019-10-31 10:36 [PATCH 1/2] add generic/585 Dmitry Monakhov
2019-10-31 10:36 ` [PATCH 2/2] add generic/586 quota stress test Dmitry Monakhov
@ 2019-11-10 16:33 ` Eryu Guan
2019-11-11 7:20 ` Dmitry Monakhov
1 sibling, 1 reply; 4+ messages in thread
From: Eryu Guan @ 2019-11-10 16:33 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: fstests, jack, Dmitry Monakhov
On Thu, Oct 31, 2019 at 10:36:38AM +0000, Dmitry Monakhov wrote:
> From: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
>
> Quotasync may livelock if others tasks generate enough dirty dquots in parallel
> This test case pefrorm fchown to produce dirty quotas
>
> This test known to detect livelock non-journaled quota for kernels prior to v5.4
>
> Signed-off-by: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
> ---
> src/Makefile | 2 +-
> src/chowner.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++
Missing an entry in .gitingore
> tests/generic/585 | 94 ++++++++++++++++++++++++++++++++++++++++++++
> tests/generic/585.out | 2 +
> tests/generic/group | 1 +
> 5 files changed, 205 insertions(+), 1 deletion(-)
> create mode 100644 src/chowner.c
> create mode 100755 tests/generic/585
> create mode 100644 tests/generic/585.out
>
> diff --git a/src/Makefile b/src/Makefile
> index ce6d861..b3ab7b4 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -28,7 +28,7 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
> attr-list-by-handle-cursor-test listxattr dio-interleaved t_dir_type \
> dio-invalidate-cache stat_test t_encrypted_d_revalidate \
> attr_replace_test swapon mkswap t_attr_corruption t_open_tmpfiles \
> - fscrypt-crypt-util bulkstat_null_ocount
> + fscrypt-crypt-util bulkstat_null_ocount chowner
>
> SUBDIRS = log-writes perf
>
> diff --git a/src/chowner.c b/src/chowner.c
> new file mode 100644
> index 0000000..043cd06
> --- /dev/null
> +++ b/src/chowner.c
> @@ -0,0 +1,107 @@
> +// SPDX-License-Identifier: GPL-2.0
> +//
> +// Copyright (c) 2019 YANDEX LLC. All Rights Reserved.
> +// Author: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
> +
> +#include <unistd.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <time.h>
> +#include <sys/time.h>
> +#include <fcntl.h>
> +#include <limits.h>
> +
> +char *progname;
> +
> +void usage(void)
> +{
> + printf("Usage %s: [-b uid] [-e uid] [-g] [-P] [-t timeout] FNAME\n", progname);
> + printf("\t\t -b: begin of uid range\n");
> + printf("\t\t -e: length of uid range\n");
^^^^^^ should be end of uid range?
> + printf("\t\t -g: change group uid\n");
> + printf("\t\t -t: loop timeout\n");
> + printf("\t\t -P: Do not run chowner loop, only prepare files\n");
> + exit(1);
> +}
> +int main(int argc, char *const *argv)
> +{
> + char *filename;
> + int fd;
> + int c;
> + uid_t begin = 2000;
> + uid_t end = 12000;
> + unsigned int timeout = 10;
> + struct timeval start, now, delta = { 0, 0 };
> + uid_t uid;
> + gid_t gid = getegid();
> + int do_group = 0;
> + int do_prepare = 0;
> +
> + progname = argv[0];
> + while ((c = getopt(argc, argv, "b:e:gPt:")) != -1) {
> + switch (c) {
> + case 'b':
> + begin = atoi(optarg);
> + break;
> + case 'e':
> + end = atoi(optarg);
> + break;
> + case 'g':
> + do_group = 1;
> + break;
> + case 'P':
> + do_prepare = 1;
> + break;
> + case 't':
> + timeout = atoi(optarg);
> + break;
> + default:
> + usage();
> + }
> + }
> + if (optind == argc-1)
> + filename = argv[optind];
> + else
> + usage();
> + if (do_prepare) {
> + char path[PATH_MAX];
> + for (uid = begin; uid < end;uid++) {
> + sprintf(path, "%s.%d", filename, uid);
So in do_prepare case, expect filename to be a filename prefix? Or a
directory? From the test code, it seems to be a dir, but this c code
treats as an filename prefix.
# Preparation step: Create all files with uid in range
# to cache quota in kernel memory
mkdir -p $SCRATCH_MNT/q
$here/src/chowner $SCRATCH_MNT/q -b $begin -e $end -P
> + fd = open(path, O_RDWR|O_CREAT, 0666);
> + if (fd < 0) {
> + perror("open");
> + exit(1);
> + }
> + if (do_group)
> + gid = uid;
> + if (fchown(fd, uid, gid)) {
> + perror("chown");
> + exit(1);
> + }
> + close(fd);
> + }
> + return 0;
> + }
> + fd = open(filename, O_RDWR|O_CREAT, 0666);
> + if (fd < 0) {
> + perror("open");
> + exit(1);
> + }
> + gettimeofday(&start, NULL);
> +
> + while (1) {
> + for (uid = begin; uid < end;uid++) {
> + if (do_group)
> + gid = uid;
> + if (fchown(fd, uid, gid)) {
> + perror("chown");
> + exit(1);
> + }
> + }
> + gettimeofday(&now, NULL);
> + timersub(&now, &start, &delta);
> + if (delta.tv_sec >= timeout)
> + break;
> + }
> + return 0;
> +}
> diff --git a/tests/generic/585 b/tests/generic/585
> new file mode 100755
> index 0000000..d37522a
> --- /dev/null
> +++ b/tests/generic/585
> @@ -0,0 +1,94 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2019 YANDEX LLC. All Rights Reserved.
> +#
> +# FS QA Test 585
> +#
> +# Check livelock during quota-sync while other tasks dirty quotas in parallel.
> +# Run fchown(2) in a loop is the fastest way to produce dirty quotas
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1 # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> + cd /
> + rm -f $tmp.*
> +}
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +. ./common/quota
> +
> +
> +begin=2000
> +end=10000
> +nr_proc=$((16 * LOAD_FACTOR))
> +deadline=$((100 * TIME_FACTOR))
So this test has two "_workout"s, and one takes around 120s, so whole
test takes around 4 minutes, which seems a bit long for a generic test.
What's the reproducibility if you lower the runtime? I'd be good to find
a good banalce between reproducibility and test run time. e.g. 20s for
60% reproducibility is better than 240s for 90%.
> +
> +_workout()
Local functions doesn't need "_" prefix.
> +{
> + for ((i=0; i < nr_proc; i++))
> + do
Use tab for indention. And the indentions are not consistent, some lines
use tab and some lines use spaces.
> + # Spread files to isolated dirs to minimize locking contention
> + mkdir -p $SCRATCH_MNT/chowner/$i
> + $here/src/chowner $SCRATCH_MNT/chowner/$i/test -b $begin -e $end \
> + -t $((deadline + 10)) &
> + pids="$pids $!"
> + done
> + # Let chowners warm up ...
> + sleep 5
> + start=$(date +%s)
> + for ((i=0;i<3;i++))
> + do
> + s=$(date +%s)
> + # In normal situation command should finish in ~1sec,
> + # but in case of livelock it will spin until chowners exits
> + $*
> + e=$(date +%s)
> + echo "loop $i: $* runtime: $((e-s))" >> $seqres.full
> + sleep 2
> + done
> + end=$(date +%s)
> + runtime=$((end-start))
> + echo "DONE: $* total runtime: $runtime" >> $seqres.full
> + kill -TERM $pids 2> /dev/null
> + wait $pids
> +
> + [ $runtime -le $deadline ] || \
> + _fail "Live lock detected, $* runtime: $runtime, deadline: $deadline"
> +}
> +
> +# real QA test starts here
> +_supported_fs generic
> +_supported_os Linux
> +_require_quota
> +#_require_user
Could be removed.
> +_require_scratch
> +_require_command "$KILLALL_PROG" killall
killall is not used, can be removed.
And we need
_require_test_program "chowner"
> +
> +rm -f $seqres.full
> +_scratch_mkfs_sized $((512 * 1024 * 1024)) >> $seqres.full 2>&1
Any reason to use sized mkfs?
> +_scratch_mount "-o quota,user"
> +chmod 777 $SCRATCH_MNT
> +quotacheck -u $SCRATCH_MNT 2>/dev/null
> +quotaon -v -u $SCRATCH_MNT >> $seqres.full 2>&1
Above steps could be replaced by:
_qmount_option "usrquota"
_qmount
Thanks,
Eryu
> +
> +# Preparation step: Create all files with uid in range
> +# to cache quota in kernel memory
> +mkdir -p $SCRATCH_MNT/q
> +$here/src/chowner $SCRATCH_MNT/q -b $begin -e $end -P
> +
> +_workout quotasync $SCRATCH_MNT
> +_workout sync
> +
> +echo "Silence is golden"
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/585.out b/tests/generic/585.out
> new file mode 100644
> index 0000000..e4dd43b
> --- /dev/null
> +++ b/tests/generic/585.out
> @@ -0,0 +1,2 @@
> +QA output created by 585
> +Silence is golden
> diff --git a/tests/generic/group b/tests/generic/group
> index 42ca2b9..703a1b4 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -587,3 +587,4 @@
> 582 auto quick encrypt
> 583 auto quick encrypt
> 584 auto quick encrypt
> +585 auto quota rw stress
> --
> 2.7.4
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 1/2] add generic/585
2019-11-10 16:33 ` [PATCH 1/2] add generic/585 Eryu Guan
@ 2019-11-11 7:20 ` Dmitry Monakhov
0 siblings, 0 replies; 4+ messages in thread
From: Dmitry Monakhov @ 2019-11-11 7:20 UTC (permalink / raw)
To: Eryu Guan; +Cc: fstests, jack
Eryu Guan <guaneryu@gmail.com> writes:
> On Thu, Oct 31, 2019 at 10:36:38AM +0000, Dmitry Monakhov wrote:
>> From: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
>>
>> Quotasync may livelock if others tasks generate enough dirty dquots in parallel
>> This test case pefrorm fchown to produce dirty quotas
>>
>> This test known to detect livelock non-journaled quota for kernels prior to v5.4
>>
>> Signed-off-by: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
>> ---
>> src/Makefile | 2 +-
>> src/chowner.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++
>
> Missing an entry in .gitingore
>
>> tests/generic/585 | 94 ++++++++++++++++++++++++++++++++++++++++++++
>> tests/generic/585.out | 2 +
>> tests/generic/group | 1 +
>> 5 files changed, 205 insertions(+), 1 deletion(-)
>> create mode 100644 src/chowner.c
>> create mode 100755 tests/generic/585
>> create mode 100644 tests/generic/585.out
>>
>> diff --git a/src/Makefile b/src/Makefile
>> index ce6d861..b3ab7b4 100644
>> --- a/src/Makefile
>> +++ b/src/Makefile
>> @@ -28,7 +28,7 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
>> attr-list-by-handle-cursor-test listxattr dio-interleaved t_dir_type \
>> dio-invalidate-cache stat_test t_encrypted_d_revalidate \
>> attr_replace_test swapon mkswap t_attr_corruption t_open_tmpfiles \
>> - fscrypt-crypt-util bulkstat_null_ocount
>> + fscrypt-crypt-util bulkstat_null_ocount chowner
>>
>> SUBDIRS = log-writes perf
>>
>> diff --git a/src/chowner.c b/src/chowner.c
>> new file mode 100644
>> index 0000000..043cd06
>> --- /dev/null
>> +++ b/src/chowner.c
>> @@ -0,0 +1,107 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +//
>> +// Copyright (c) 2019 YANDEX LLC. All Rights Reserved.
>> +// Author: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
>> +
>> +#include <unistd.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <time.h>
>> +#include <sys/time.h>
>> +#include <fcntl.h>
>> +#include <limits.h>
>> +
>> +char *progname;
>> +
>> +void usage(void)
>> +{
>> + printf("Usage %s: [-b uid] [-e uid] [-g] [-P] [-t timeout] FNAME\n", progname);
>> + printf("\t\t -b: begin of uid range\n");
>> + printf("\t\t -e: length of uid range\n");
> ^^^^^^ should be end of uid range?
>
>> + printf("\t\t -g: change group uid\n");
>> + printf("\t\t -t: loop timeout\n");
>> + printf("\t\t -P: Do not run chowner loop, only prepare files\n");
>> + exit(1);
>> +}
>> +int main(int argc, char *const *argv)
>> +{
>> + char *filename;
>> + int fd;
>> + int c;
>> + uid_t begin = 2000;
>> + uid_t end = 12000;
>> + unsigned int timeout = 10;
>> + struct timeval start, now, delta = { 0, 0 };
>> + uid_t uid;
>> + gid_t gid = getegid();
>> + int do_group = 0;
>> + int do_prepare = 0;
>> +
>> + progname = argv[0];
>> + while ((c = getopt(argc, argv, "b:e:gPt:")) != -1) {
>> + switch (c) {
>> + case 'b':
>> + begin = atoi(optarg);
>> + break;
>> + case 'e':
>> + end = atoi(optarg);
>> + break;
>> + case 'g':
>> + do_group = 1;
>> + break;
>> + case 'P':
>> + do_prepare = 1;
>> + break;
>> + case 't':
>> + timeout = atoi(optarg);
>> + break;
>> + default:
>> + usage();
>> + }
>> + }
>> + if (optind == argc-1)
>> + filename = argv[optind];
>> + else
>> + usage();
>> + if (do_prepare) {
>> + char path[PATH_MAX];
>> + for (uid = begin; uid < end;uid++) {
>> + sprintf(path, "%s.%d", filename, uid);
>
> So in do_prepare case, expect filename to be a filename prefix? Or a
> directory? From the test code, it seems to be a dir, but this c code
> treats as an filename prefix.
>
> # Preparation step: Create all files with uid in range
> # to cache quota in kernel memory
> mkdir -p $SCRATCH_MNT/q
> $here/src/chowner $SCRATCH_MNT/q -b $begin -e $end -P
>
>> + fd = open(path, O_RDWR|O_CREAT, 0666);
>> + if (fd < 0) {
>> + perror("open");
>> + exit(1);
>> + }
>> + if (do_group)
>> + gid = uid;
>> + if (fchown(fd, uid, gid)) {
>> + perror("chown");
>> + exit(1);
>> + }
>> + close(fd);
>> + }
>> + return 0;
>> + }
>> + fd = open(filename, O_RDWR|O_CREAT, 0666);
>> + if (fd < 0) {
>> + perror("open");
>> + exit(1);
>> + }
>> + gettimeofday(&start, NULL);
>> +
>> + while (1) {
>> + for (uid = begin; uid < end;uid++) {
>> + if (do_group)
>> + gid = uid;
>> + if (fchown(fd, uid, gid)) {
>> + perror("chown");
>> + exit(1);
>> + }
>> + }
>> + gettimeofday(&now, NULL);
>> + timersub(&now, &start, &delta);
>> + if (delta.tv_sec >= timeout)
>> + break;
>> + }
>> + return 0;
>> +}
>> diff --git a/tests/generic/585 b/tests/generic/585
>> new file mode 100755
>> index 0000000..d37522a
>> --- /dev/null
>> +++ b/tests/generic/585
>> @@ -0,0 +1,94 @@
>> +#! /bin/bash
>> +# SPDX-License-Identifier: GPL-2.0
>> +# Copyright (c) 2019 YANDEX LLC. All Rights Reserved.
>> +#
>> +# FS QA Test 585
>> +#
>> +# Check livelock during quota-sync while other tasks dirty quotas in parallel.
>> +# Run fchown(2) in a loop is the fastest way to produce dirty quotas
>> +#
>> +seq=`basename $0`
>> +seqres=$RESULT_DIR/$seq
>> +echo "QA output created by $seq"
>> +
>> +here=`pwd`
>> +tmp=/tmp/$$
>> +status=1 # failure is the default!
>> +trap "_cleanup; exit \$status" 0 1 2 3 15
>> +
>> +_cleanup()
>> +{
>> + cd /
>> + rm -f $tmp.*
>> +}
>> +# get standard environment, filters and checks
>> +. ./common/rc
>> +. ./common/filter
>> +. ./common/quota
>> +
>> +
>> +begin=2000
>> +end=10000
>> +nr_proc=$((16 * LOAD_FACTOR))
>> +deadline=$((100 * TIME_FACTOR))
>
> So this test has two "_workout"s, and one takes around 120s, so whole
> test takes around 4 minutes, which seems a bit long for a generic test.
>
> What's the reproducibility if you lower the runtime? I'd be good to find
> a good banalce between reproducibility and test run time. e.g. 20s for
> 60% reproducibility is better than 240s for 90%.
This happens only in case of deadlock detected, normaly it will
take ~25-30sec. I can lower this number to 20sec.
All other notes are valid and will be fixed in v2.
>
>> +
>> +_workout()
>
> Local functions doesn't need "_" prefix.
>
>> +{
>> + for ((i=0; i < nr_proc; i++))
>> + do
>
> Use tab for indention. And the indentions are not consistent, some lines
> use tab and some lines use spaces.
>
>> + # Spread files to isolated dirs to minimize locking contention
>> + mkdir -p $SCRATCH_MNT/chowner/$i
>> + $here/src/chowner $SCRATCH_MNT/chowner/$i/test -b $begin -e $end \
>> + -t $((deadline + 10)) &
>> + pids="$pids $!"
>> + done
>> + # Let chowners warm up ...
>> + sleep 5
>> + start=$(date +%s)
>> + for ((i=0;i<3;i++))
>> + do
>> + s=$(date +%s)
>> + # In normal situation command should finish in ~1sec,
>> + # but in case of livelock it will spin until chowners exits
>> + $*
>> + e=$(date +%s)
>> + echo "loop $i: $* runtime: $((e-s))" >> $seqres.full
>> + sleep 2
>> + done
>> + end=$(date +%s)
>> + runtime=$((end-start))
>> + echo "DONE: $* total runtime: $runtime" >> $seqres.full
>> + kill -TERM $pids 2> /dev/null
>> + wait $pids
>> +
>> + [ $runtime -le $deadline ] || \
>> + _fail "Live lock detected, $* runtime: $runtime, deadline: $deadline"
>> +}
>> +
>> +# real QA test starts here
>> +_supported_fs generic
>> +_supported_os Linux
>> +_require_quota
>> +#_require_user
>
> Could be removed.
>
>> +_require_scratch
>> +_require_command "$KILLALL_PROG" killall
>
> killall is not used, can be removed.
>
> And we need
>
> _require_test_program "chowner"
>
>> +
>> +rm -f $seqres.full
>> +_scratch_mkfs_sized $((512 * 1024 * 1024)) >> $seqres.full 2>&1
>
> Any reason to use sized mkfs?
>
>> +_scratch_mount "-o quota,user"
>> +chmod 777 $SCRATCH_MNT
>> +quotacheck -u $SCRATCH_MNT 2>/dev/null
>> +quotaon -v -u $SCRATCH_MNT >> $seqres.full 2>&1
>
> Above steps could be replaced by:
>
> _qmount_option "usrquota"
> _qmount
>
> Thanks,
> Eryu
>
>> +
>> +# Preparation step: Create all files with uid in range
>> +# to cache quota in kernel memory
>> +mkdir -p $SCRATCH_MNT/q
>> +$here/src/chowner $SCRATCH_MNT/q -b $begin -e $end -P
>> +
>> +_workout quotasync $SCRATCH_MNT
>> +_workout sync
>> +
>> +echo "Silence is golden"
>> +# success, all done
>> +status=0
>> +exit
>> diff --git a/tests/generic/585.out b/tests/generic/585.out
>> new file mode 100644
>> index 0000000..e4dd43b
>> --- /dev/null
>> +++ b/tests/generic/585.out
>> @@ -0,0 +1,2 @@
>> +QA output created by 585
>> +Silence is golden
>> diff --git a/tests/generic/group b/tests/generic/group
>> index 42ca2b9..703a1b4 100644
>> --- a/tests/generic/group
>> +++ b/tests/generic/group
>> @@ -587,3 +587,4 @@
>> 582 auto quick encrypt
>> 583 auto quick encrypt
>> 584 auto quick encrypt
>> +585 auto quota rw stress
>> --
>> 2.7.4
>>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2019-11-11 7:20 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-31 10:36 [PATCH 1/2] add generic/585 Dmitry Monakhov
2019-10-31 10:36 ` [PATCH 2/2] add generic/586 quota stress test Dmitry Monakhov
2019-11-10 16:33 ` [PATCH 1/2] add generic/585 Eryu Guan
2019-11-11 7:20 ` Dmitry Monakhov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).