All of lore.kernel.org
 help / color / mirror / Atom feed
* intermittent failure, s390 host, x86-64 guest, vhost-user-blk test
@ 2021-08-04 12:40 Peter Maydell
  2021-08-04 13:43 ` Michael S. Tsirkin
  0 siblings, 1 reply; 6+ messages in thread
From: Peter Maydell @ 2021-08-04 12:40 UTC (permalink / raw)
  To: QEMU Developers; +Cc: Michael S. Tsirkin

Saw this intermittent as part of my ad-hoc CI test setup.
The backtrace says that the QEMU process has somehow hung
during 'realize' of the vhost-user device...

Process tree:

ubuntu   59552  0.0  0.0   5460  3120 ?        S    06:51   0:00
           \_ bash -o pipefail -c echo
'MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
QTEST_QEMU_IMG=./qemu-img
G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
QTEST_QEMU_BINARY=./qemu-system-x86_64
QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
tests/qtest/qos-test --tap -k' &&
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
QTEST_QEMU_IMG=./qemu-img
G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
QTEST_QEMU_BINARY=./qemu-system-x86_64
QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
tests/qtest/qos-test --tap -k -m quick < /dev/null |
./scripts/tap-driver.pl --test-name="qtest-x86_64/qos-test"
ubuntu   59553  0.0  0.2  88008  8332 ?        Sl   06:51   0:00
               \_ tests/qtest/qos-test --tap -k -m quick
ubuntu   60122  0.0  0.5 254432 21516 ?        Sl   06:51   0:00
               |   \_ ./storage-daemon/qemu-storage-daemon --blockdev
driver=file,node-name=disk0,filename=qtest.trJnuR --export
type=vhost-user-blk,id=disk0,addr.type=unix,addr.path=/tmp/qtest-59553-sock.iRt6WL,node-name=disk0,writable=on,num-queues=1
ubuntu   60123  0.0  1.6 747952 62732 ?        Sl   06:51   0:00
               |   \_ ./qemu-system-x86_64 -qtest
unix:/tmp/qtest-59553.sock -qtest-log /dev/null -chardev
socket,path=/tmp/qtest-59553.qmp,id=char0 -mon
chardev=char0,mode=control -display none -M pc -device
vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
memory-backend-memfd,id=mem,size=256M,share=on -M memory-backend=mem
-m 256M -chardev socket,id=char1,path=/tmp/qtest-59553-sock.iRt6WL
-accel qtest
ubuntu   59554  0.0  0.2  14116 11204 ?        S    06:51   0:00
               \_ perl ./scripts/tap-driver.pl
--test-name=qtest-x86_64/qos-test

Backtrace, qemu process:

Thread 4 (Thread 0x3ff5be1d910 (LWP 60131)):
#0  0x000003ff7e2bed6a in __GI___sigtimedwait (set=<optimized out>,
    set@entry=0x3ff5be1a098, info=info@entry=0x3ff5be19f68,
timeout=timeout@entry=0x0)
    at ../sysdeps/unix/sysv/linux/sigtimedwait.c:42
#1  0x000003ff7e492ca8 in __sigwait (set=set@entry=0x3ff5be1a098,
sig=sig@entry=0x3ff5be1a094)
    at ../sysdeps/unix/sysv/linux/sigwait.c:28
#2  0x000002aa16891ab2 in dummy_cpu_thread_fn
(arg=arg@entry=0x2aa182103e0) at ../../accel/dummy-cpus.c:46
#3  0x000002aa16b22f5a in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:541
#4  0x000003ff7e487aa8 in start_thread (arg=0x3ff5be1d910) at
pthread_create.c:463
#5  0x000003ff7e37a11e in thread_start () at
../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65

Thread 3 (Thread 0x3ff608b4910 (LWP 60130)):
#0  0x000003ff7e36f1b0 in __GI___poll (fds=0x3ff540031f0, nfds=3,
timeout=<optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:29
#1  0x000003ff7f9d2624 in  () at /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#2  0x000003ff7f9d2aa8 in g_main_loop_run () at
/usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#3  0x000002aa1695ad26 in iothread_run
(opaque=opaque@entry=0x2aa17ea2b80) at ../../iothread.c:73
#4  0x000002aa16b22f5a in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:541
#5  0x000003ff7e487aa8 in start_thread (arg=0x3ff608b4910) at
pthread_create.c:463
#6  0x000003ff7e37a11e in thread_start () at
../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65

Thread 2 (Thread 0x3ff711b7910 (LWP 60129)):
#0  0x000003ff7e4911f6 in __lll_lock_wait
(futex=futex@entry=0x2aa17466bb8 <qemu_global_mutex>,
private=<optimized out>)
    at lowlevellock.c:46
#1  0x000003ff7e48a204 in __GI___pthread_mutex_lock
(mutex=mutex@entry=0x2aa17466bb8 <qemu_global_mutex>)
    at ../nptl/pthread_mutex_lock.c:80
#2  0x000002aa16b23116 in qemu_mutex_lock_impl (mutex=0x2aa17466bb8
<qemu_global_mutex>, file=0x2aa170a07d8 "../../util/rcu.c",
line=<optimized out>) at ../../util/qemu-thread-posix.c:79
#3  0x000002aa168221c0 in qemu_mutex_lock_iothread_impl
(file=file@entry=0x2aa170a07d8 "../../util/rcu.c",
line=line@entry=266)
    at ../../softmmu/cpus.c:493
#4  0x000002aa16b4c8aa in call_rcu_thread (opaque=opaque@entry=0x0) at
../../util/rcu.c:266
#5  0x000002aa16b22f5a in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:541
#6  0x000003ff7e487aa8 in start_thread (arg=0x3ff711b7910) at
pthread_create.c:463
#7  0x000003ff7e37a11e in thread_start () at
../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65

Thread 1 (Thread 0x3ff81035470 (LWP 60123)):
#0  0x000003ff7e36f1b0 in __GI___poll (fds=0x2aa18147da0, nfds=2,
timeout=<optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:29
#1  0x000003ff7f9d2624 in  () at /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#2  0x000003ff7f9d2aa8 in g_main_loop_run () at
/usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#3  0x000002aa169104de in vhost_user_read
(dev=dev@entry=0x2aa18f7afc8, msg=msg@entry=0x3ffc167ddfc)
    at ../../hw/virtio/vhost-user.c:402
#4  0x000002aa1691186e in vhost_user_get_u64
(dev=dev@entry=0x2aa18f7afc8, request=request@entry=1,
u64=u64@entry=0x3ffc167e138)
    at ../../hw/virtio/vhost-user.c:1334
#5  0x000002aa16913388 in vhost_user_get_features
(features=0x3ffc167e138, dev=0x2aa18f7afc8) at
../../hw/virtio/vhost-user.c:1356
#6  0x000002aa16913388 in vhost_user_backend_init (dev=0x2aa18f7afc8,
opaque=0x2aa18f7b240, errp=0x3ffc167e470)
    at ../../hw/virtio/vhost-user.c:1877
#7  0x000002aa1684030c in vhost_dev_init
(hdev=hdev@entry=0x2aa18f7afc8, opaque=opaque@entry=0x2aa18f7b240,
backend_type=backend_type@entry=VHOST_BACKEND_TYPE_USER,
busyloop_timeout=busyloop_timeout@entry=0,
errp=errp@entry=0x3ffc167e470)
    at ../../hw/virtio/vhost.c:1302
#8  0x000002aa168be742 in vhost_user_blk_connect
(dev=dev@entry=0x2aa18f7ad78, errp=errp@entry=0x3ffc167e470)
    at ../../hw/block/vhost-user-blk.c:339
#9  0x000002aa168bec44 in vhost_user_blk_realize_connect
(errp=0x3ffc167e470, s=0x2aa18f7ad78) at
../../hw/block/vhost-user-blk.c:440
#10 0x000002aa168bec44 in vhost_user_blk_device_realize
(dev=0x2aa18f7ad78, errp=0x3ffc167e470)
    at ../../hw/block/vhost-user-blk.c:513
#11 0x000002aa16819e78 in virtio_device_realize (dev=0x2aa18f7ad78,
errp=0x3ffc167e530) at ../../hw/virtio/virtio.c:3659
#12 0x000002aa169fcc50 in device_set_realized (obj=<optimized out>,
value=<optimized out>, errp=0x3ffc167e690)
    at ../../hw/core/qdev.c:761
#13 0x000002aa169229ba in property_set_bool
(obj=obj@entry=0x2aa18f7ad78, v=v@entry=
    0x2aa18fc41e0, name=name@entry=0x2aa16ccfd74 "realized",
opaque=0x2aa17f2a7a0, errp=errp@entry=0x3ffc167e690)
    at ../../qom/object.c:2258
#14 0x000002aa16925534 in object_property_set
(obj=obj@entry=0x2aa18f7ad78, name=name@entry=0x2aa16ccfd74
"realized", v=v@entry=
    0x2aa18fc41e0, errp=errp@entry=0x3ffc167e980) at ../../qom/object.c:1403
#15 0x000002aa1692229c in object_property_set_qobject
(obj=obj@entry=0x2aa18f7ad78, name=name@entry=0x2aa16ccfd74
"realized", value=value@entry=0x2aa18fc4120,
errp=errp@entry=0x3ffc167e980) at ../../qom/qom-qobject.c:28
#16 0x000002aa169257e2 in object_property_set_bool (obj=0x2aa18f7ad78,
name=0x2aa16ccfd74 "realized", value=<optimized out>,
errp=0x3ffc167e980) at ../../qom/object.c:1473
#17 0x000002aa1677e9c2 in virtio_pci_realize (pci_dev=0x2aa18f72b80,
errp=0x3ffc167e980) at ../../hw/virtio/virtio-pci.c:1907
#18 0x000002aa165dfc48 in pci_qdev_realize (qdev=0x2aa18f72b80,
errp=<optimized out>) at ../../hw/pci/pci.c:2148
#19 0x000002aa169fcc50 in device_set_realized (obj=<optimized out>,
value=<optimized out>, errp=0x3ffc167ebe8)
    at ../../hw/core/qdev.c:761
#20 0x000002aa169229ba in property_set_bool
(obj=obj@entry=0x2aa18f72b80, v=v@entry=
    0x2aa18fbe450, name=name@entry=0x2aa16ccfd74 "realized",
opaque=0x2aa17f2a7a0, errp=errp@entry=0x3ffc167ebe8)
    at ../../qom/object.c:2258
#21 0x000002aa16925534 in object_property_set
(obj=obj@entry=0x2aa18f72b80, name=name@entry=0x2aa16ccfd74
"realized", v=v@entry=
    0x2aa18fbe450, errp=errp@entry=0x2aa17484fe0 <error_fatal>) at
../../qom/object.c:1403
#22 0x000002aa1692229c in object_property_set_qobject
(obj=obj@entry=0x2aa18f72b80, name=name@entry=0x2aa16ccfd74
"realized", value=value@entry=0x2aa18fbe580,
errp=errp@entry=0x2aa17484fe0 <error_fatal>) at
../../qom/qom-qobject.c:28
#23 0x000002aa169257e2 in object_property_set_bool (obj=0x2aa18f72b80,
name=name@entry=0x2aa16ccfd74 "realized", value=value@entry=true,
errp=0x2aa17484fe0 <error_fatal>, errp@entry=<error reading variable:
value has been optimized out>) at ../../qom/object.c:1473
#24 0x000002aa169fb87e in qdev_realize (dev=<optimized out>,
bus=bus@entry=0x2aa18384200, errp=<error reading variable: value has
been optimized out>) at ../../hw/core/qdev.c:389
#25 0x000002aa165f1c6c in qdev_device_add (opts=<optimized out>,
errp=<optimized out>, errp@entry=0x2aa17484fe0 <error_fatal>)
    at ../../softmmu/qdev-monitor.c:673
#26 0x000002aa168b2086 in device_init_func (opaque=<optimized out>,
opts=<optimized out>, errp=0x2aa17484fe0 <error_fatal>)
    at ../../softmmu/vl.c:1187
#27 0x000002aa16b2b906 in qemu_opts_foreach (list=<optimized out>,
func=func@entry=0x2aa168b2068 <device_init_func>,
opaque=opaque@entry=0x0, errp=0x2aa17484fe0 <error_fatal>) at
../../util/qemu-option.c:1135
#28 0x000002aa168b5046 in qemu_create_cli_devices () at ../../softmmu/vl.c:2645
#29 0x000002aa168b5046 in qmp_x_exit_preconfig (errp=<optimized out>)
at ../../softmmu/vl.c:2693
#30 0x000002aa168b83ce in qemu_init (argc=<optimized out>,
argv=<optimized out>, envp=<optimized out>) at ../../softmmu/vl.c:3713
#31 0x000002aa1654f6da in main (argc=<optimized out>, argv=<optimized
out>, envp=<optimized out>) at ../../softmmu/main.c:49


Backtrace, qos-test:

Thread 2 (Thread 0x3ff9de7f910 (LWP 59555)):
#0  syscall () at ../sysdeps/unix/sysv/linux/s390/s390-64/syscall.S:53
#1  0x000002aa20c36a54 in qemu_futex_wait (val=<optimized out>,
f=<optimized out>) at /home/ubuntu/qemu/include/qemu/futex.h:29
#2  qemu_event_wait (ev=ev@entry=0x2aa20cdc3a0 <rcu_call_ready_event>)
at ../../util/qemu-thread-posix.c:480
#3  0x000002aa20c4efa4 in call_rcu_thread (opaque=opaque@entry=0x0) at
../../util/rcu.c:258
#4  0x000002aa20c35a6a in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:541
#5  0x000003ff9e887aa8 in start_thread (arg=0x3ff9de7f910) at
pthread_create.c:463
#6  0x000003ff9e77a11e in thread_start () at
../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65

Thread 1 (Thread 0x3ff9eef0940 (LWP 59553)):
#0  0x000003ff9e89165a in __libc_read (fd=<optimized out>,
buf=buf@entry=0x3ffc10fd3e0, nbytes=nbytes@entry=1024)
    at ../sysdeps/unix/sysv/linux/read.c:27
#1  0x000002aa20bdee7a in read (__nbytes=1024, __buf=0x3ffc10fd3e0,
__fd=<optimized out>)
    at /usr/include/s390x-linux-gnu/bits/unistd.h:44
#2  qtest_client_socket_recv_line (s=0x2aa22379fc0) at
../../tests/qtest/libqtest.c:494
#3  0x000002aa20bdf032 in qtest_rsp_args (s=s@entry=0x2aa22379fc0,
expected_args=expected_args@entry=1)
    at ../../tests/qtest/libqtest.c:521
#4  0x000002aa20bdf6f2 in qtest_query_target_endianness
(s=0x2aa22379fc0) at ../../tests/qtest/libqtest.c:570
#5  0x000002aa20be0312 in qtest_init_without_qmp_handshake
(extra_args=<optimized out>) at ../../tests/qtest/libqtest.c:332
#6  0x000002aa20be09e2 in qtest_init (extra_args=<optimized out>) at
../../tests/qtest/libqtest.c:339
#7  0x000002aa20bc7b76 in qtest_start (
    args=0x2aa22389ef0 "-M pc  -device
vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
memory-backend-memfd,id=mem,size=256M,share=on  -M memory-backend=mem
-m 256M -chardev socket,id=char1,path=/tmp/qtest-59553-so"...)
    at ../../tests/qtest/libqtest-single.h:29
#8  restart_qemu_or_continue (
    path=0x2aa22389ef0 "-M pc  -device
vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
memory-backend-memfd,id=mem,size=256M,share=on  -M memory-backend=mem
-m 256M -chardev socket,id=char1,path=/tmp/qtest-59553-so"...) at
../../tests/qtest/qos-test.c:105
#9  run_one_test (arg=<optimized out>) at ../../tests/qtest/qos-test.c:178
#10 0x000003ff9eb7c70c in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#11 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#12 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#13 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#14 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#15 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#16 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#17 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#18 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#19 0x000003ff9eb7c8ee in g_test_run_suite () from
/usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#20 0x000003ff9eb7c928 in g_test_run () from
/usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#21 0x000002aa20bc6c9a in main (argc=<optimized out>, argv=<optimized
out>, envp=<optimized out>) at ../../tests/qtest/qos-test.c:338

Backtrace, qemu-storage-daemon:

Thread 2 (Thread 0x3ff951b7910 (LWP 60132)):
#0  syscall () at ../sysdeps/unix/sysv/linux/s390/s390-64/syscall.S:53
#1  0x000002aa0184fa6c in qemu_futex_wait (val=<optimized out>,
f=<optimized out>) at /home/ubuntu/qemu/include/qemu/futex.h:29
#2  qemu_event_wait (ev=ev@entry=0x2aa01971158 <rcu_call_ready_event>)
at ../../util/qemu-thread-posix.c:480
#3  0x000002aa0187498c in call_rcu_thread (opaque=opaque@entry=0x0) at
../../util/rcu.c:258
#4  0x000002aa0184ea82 in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:541
#5  0x000003ffa0507aa8 in start_thread (arg=0x3ff951b7910) at
pthread_create.c:463
#6  0x000003ffa03fa11e in thread_start () at
../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65

Thread 1 (Thread 0x3ffa14590e0 (LWP 60122)):
#0  0x000003ffa03ef302 in __GI_ppoll (fds=0x2aa0332b3d0, nfds=5,
timeout=<optimized out>, timeout@entry=0x0,
    sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39
#1  0x000002aa0185b054 in ppoll (__ss=0x0, __timeout=0x0,
__nfds=<optimized out>, __fds=<optimized out>)
    at /usr/include/s390x-linux-gnu/bits/poll2.h:77
#2  qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>,
timeout=timeout@entry=-1) at ../../util/qemu-timer.c:336
#3  0x000002aa01851e48 in os_host_main_loop_wait (timeout=-1) at
../../util/main-loop.c:250
#4  main_loop_wait (nonblocking=<optimized out>) at ../../util/main-loop.c:531
#5  0x000002aa01738dda in main (argc=<optimized out>,
argv=0x3ffd28feba8) at ../../storage-daemon/qemu-storage-daemon.c:345


thanks
-- PMM


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: intermittent failure, s390 host, x86-64 guest, vhost-user-blk test
  2021-08-04 12:40 intermittent failure, s390 host, x86-64 guest, vhost-user-blk test Peter Maydell
@ 2021-08-04 13:43 ` Michael S. Tsirkin
  2021-08-04 14:33   ` Peter Maydell
  2021-08-27 16:53   ` Raphael Norwitz
  0 siblings, 2 replies; 6+ messages in thread
From: Michael S. Tsirkin @ 2021-08-04 13:43 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Kevin Wolf, QEMU Developers, Raphael Norwitz, Stefan Hajnoczi,
	Paolo Bonzini, Max Reitz, Eric Blake, Stefano Garzarella

On Wed, Aug 04, 2021 at 01:40:37PM +0100, Peter Maydell wrote:
> Saw this intermittent as part of my ad-hoc CI test setup.
> The backtrace says that the QEMU process has somehow hung
> during 'realize' of the vhost-user device...

Hmm seems to have to do with the daemon not responding ...
Cc a bunch more people ...

> Process tree:
> 
> ubuntu   59552  0.0  0.0   5460  3120 ?        S    06:51   0:00
>            \_ bash -o pipefail -c echo
> 'MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
> QTEST_QEMU_IMG=./qemu-img
> G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
> QTEST_QEMU_BINARY=./qemu-system-x86_64
> QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
> tests/qtest/qos-test --tap -k' &&
> MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
> QTEST_QEMU_IMG=./qemu-img
> G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
> QTEST_QEMU_BINARY=./qemu-system-x86_64
> QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
> tests/qtest/qos-test --tap -k -m quick < /dev/null |
> ./scripts/tap-driver.pl --test-name="qtest-x86_64/qos-test"
> ubuntu   59553  0.0  0.2  88008  8332 ?        Sl   06:51   0:00
>                \_ tests/qtest/qos-test --tap -k -m quick
> ubuntu   60122  0.0  0.5 254432 21516 ?        Sl   06:51   0:00
>                |   \_ ./storage-daemon/qemu-storage-daemon --blockdev
> driver=file,node-name=disk0,filename=qtest.trJnuR --export
> type=vhost-user-blk,id=disk0,addr.type=unix,addr.path=/tmp/qtest-59553-sock.iRt6WL,node-name=disk0,writable=on,num-queues=1
> ubuntu   60123  0.0  1.6 747952 62732 ?        Sl   06:51   0:00
>                |   \_ ./qemu-system-x86_64 -qtest
> unix:/tmp/qtest-59553.sock -qtest-log /dev/null -chardev
> socket,path=/tmp/qtest-59553.qmp,id=char0 -mon
> chardev=char0,mode=control -display none -M pc -device
> vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
> memory-backend-memfd,id=mem,size=256M,share=on -M memory-backend=mem
> -m 256M -chardev socket,id=char1,path=/tmp/qtest-59553-sock.iRt6WL
> -accel qtest
> ubuntu   59554  0.0  0.2  14116 11204 ?        S    06:51   0:00
>                \_ perl ./scripts/tap-driver.pl
> --test-name=qtest-x86_64/qos-test
> 
> Backtrace, qemu process:
> 
> Thread 4 (Thread 0x3ff5be1d910 (LWP 60131)):
> #0  0x000003ff7e2bed6a in __GI___sigtimedwait (set=<optimized out>,
>     set@entry=0x3ff5be1a098, info=info@entry=0x3ff5be19f68,
> timeout=timeout@entry=0x0)
>     at ../sysdeps/unix/sysv/linux/sigtimedwait.c:42
> #1  0x000003ff7e492ca8 in __sigwait (set=set@entry=0x3ff5be1a098,
> sig=sig@entry=0x3ff5be1a094)
>     at ../sysdeps/unix/sysv/linux/sigwait.c:28
> #2  0x000002aa16891ab2 in dummy_cpu_thread_fn
> (arg=arg@entry=0x2aa182103e0) at ../../accel/dummy-cpus.c:46
> #3  0x000002aa16b22f5a in qemu_thread_start (args=<optimized out>) at
> ../../util/qemu-thread-posix.c:541
> #4  0x000003ff7e487aa8 in start_thread (arg=0x3ff5be1d910) at
> pthread_create.c:463
> #5  0x000003ff7e37a11e in thread_start () at
> ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65
> 
> Thread 3 (Thread 0x3ff608b4910 (LWP 60130)):
> #0  0x000003ff7e36f1b0 in __GI___poll (fds=0x3ff540031f0, nfds=3,
> timeout=<optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:29
> #1  0x000003ff7f9d2624 in  () at /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #2  0x000003ff7f9d2aa8 in g_main_loop_run () at
> /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #3  0x000002aa1695ad26 in iothread_run
> (opaque=opaque@entry=0x2aa17ea2b80) at ../../iothread.c:73
> #4  0x000002aa16b22f5a in qemu_thread_start (args=<optimized out>) at
> ../../util/qemu-thread-posix.c:541
> #5  0x000003ff7e487aa8 in start_thread (arg=0x3ff608b4910) at
> pthread_create.c:463
> #6  0x000003ff7e37a11e in thread_start () at
> ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65
> 
> Thread 2 (Thread 0x3ff711b7910 (LWP 60129)):
> #0  0x000003ff7e4911f6 in __lll_lock_wait
> (futex=futex@entry=0x2aa17466bb8 <qemu_global_mutex>,
> private=<optimized out>)
>     at lowlevellock.c:46
> #1  0x000003ff7e48a204 in __GI___pthread_mutex_lock
> (mutex=mutex@entry=0x2aa17466bb8 <qemu_global_mutex>)
>     at ../nptl/pthread_mutex_lock.c:80
> #2  0x000002aa16b23116 in qemu_mutex_lock_impl (mutex=0x2aa17466bb8
> <qemu_global_mutex>, file=0x2aa170a07d8 "../../util/rcu.c",
> line=<optimized out>) at ../../util/qemu-thread-posix.c:79
> #3  0x000002aa168221c0 in qemu_mutex_lock_iothread_impl
> (file=file@entry=0x2aa170a07d8 "../../util/rcu.c",
> line=line@entry=266)
>     at ../../softmmu/cpus.c:493
> #4  0x000002aa16b4c8aa in call_rcu_thread (opaque=opaque@entry=0x0) at
> ../../util/rcu.c:266
> #5  0x000002aa16b22f5a in qemu_thread_start (args=<optimized out>) at
> ../../util/qemu-thread-posix.c:541
> #6  0x000003ff7e487aa8 in start_thread (arg=0x3ff711b7910) at
> pthread_create.c:463
> #7  0x000003ff7e37a11e in thread_start () at
> ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65
> 
> Thread 1 (Thread 0x3ff81035470 (LWP 60123)):
> #0  0x000003ff7e36f1b0 in __GI___poll (fds=0x2aa18147da0, nfds=2,
> timeout=<optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:29
> #1  0x000003ff7f9d2624 in  () at /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #2  0x000003ff7f9d2aa8 in g_main_loop_run () at
> /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #3  0x000002aa169104de in vhost_user_read
> (dev=dev@entry=0x2aa18f7afc8, msg=msg@entry=0x3ffc167ddfc)
>     at ../../hw/virtio/vhost-user.c:402
> #4  0x000002aa1691186e in vhost_user_get_u64
> (dev=dev@entry=0x2aa18f7afc8, request=request@entry=1,
> u64=u64@entry=0x3ffc167e138)
>     at ../../hw/virtio/vhost-user.c:1334
> #5  0x000002aa16913388 in vhost_user_get_features
> (features=0x3ffc167e138, dev=0x2aa18f7afc8) at
> ../../hw/virtio/vhost-user.c:1356
> #6  0x000002aa16913388 in vhost_user_backend_init (dev=0x2aa18f7afc8,
> opaque=0x2aa18f7b240, errp=0x3ffc167e470)
>     at ../../hw/virtio/vhost-user.c:1877
> #7  0x000002aa1684030c in vhost_dev_init
> (hdev=hdev@entry=0x2aa18f7afc8, opaque=opaque@entry=0x2aa18f7b240,
> backend_type=backend_type@entry=VHOST_BACKEND_TYPE_USER,
> busyloop_timeout=busyloop_timeout@entry=0,
> errp=errp@entry=0x3ffc167e470)
>     at ../../hw/virtio/vhost.c:1302
> #8  0x000002aa168be742 in vhost_user_blk_connect
> (dev=dev@entry=0x2aa18f7ad78, errp=errp@entry=0x3ffc167e470)
>     at ../../hw/block/vhost-user-blk.c:339
> #9  0x000002aa168bec44 in vhost_user_blk_realize_connect
> (errp=0x3ffc167e470, s=0x2aa18f7ad78) at
> ../../hw/block/vhost-user-blk.c:440
> #10 0x000002aa168bec44 in vhost_user_blk_device_realize
> (dev=0x2aa18f7ad78, errp=0x3ffc167e470)
>     at ../../hw/block/vhost-user-blk.c:513
> #11 0x000002aa16819e78 in virtio_device_realize (dev=0x2aa18f7ad78,
> errp=0x3ffc167e530) at ../../hw/virtio/virtio.c:3659
> #12 0x000002aa169fcc50 in device_set_realized (obj=<optimized out>,
> value=<optimized out>, errp=0x3ffc167e690)
>     at ../../hw/core/qdev.c:761
> #13 0x000002aa169229ba in property_set_bool
> (obj=obj@entry=0x2aa18f7ad78, v=v@entry=
>     0x2aa18fc41e0, name=name@entry=0x2aa16ccfd74 "realized",
> opaque=0x2aa17f2a7a0, errp=errp@entry=0x3ffc167e690)
>     at ../../qom/object.c:2258
> #14 0x000002aa16925534 in object_property_set
> (obj=obj@entry=0x2aa18f7ad78, name=name@entry=0x2aa16ccfd74
> "realized", v=v@entry=
>     0x2aa18fc41e0, errp=errp@entry=0x3ffc167e980) at ../../qom/object.c:1403
> #15 0x000002aa1692229c in object_property_set_qobject
> (obj=obj@entry=0x2aa18f7ad78, name=name@entry=0x2aa16ccfd74
> "realized", value=value@entry=0x2aa18fc4120,
> errp=errp@entry=0x3ffc167e980) at ../../qom/qom-qobject.c:28
> #16 0x000002aa169257e2 in object_property_set_bool (obj=0x2aa18f7ad78,
> name=0x2aa16ccfd74 "realized", value=<optimized out>,
> errp=0x3ffc167e980) at ../../qom/object.c:1473
> #17 0x000002aa1677e9c2 in virtio_pci_realize (pci_dev=0x2aa18f72b80,
> errp=0x3ffc167e980) at ../../hw/virtio/virtio-pci.c:1907
> #18 0x000002aa165dfc48 in pci_qdev_realize (qdev=0x2aa18f72b80,
> errp=<optimized out>) at ../../hw/pci/pci.c:2148
> #19 0x000002aa169fcc50 in device_set_realized (obj=<optimized out>,
> value=<optimized out>, errp=0x3ffc167ebe8)
>     at ../../hw/core/qdev.c:761
> #20 0x000002aa169229ba in property_set_bool
> (obj=obj@entry=0x2aa18f72b80, v=v@entry=
>     0x2aa18fbe450, name=name@entry=0x2aa16ccfd74 "realized",
> opaque=0x2aa17f2a7a0, errp=errp@entry=0x3ffc167ebe8)
>     at ../../qom/object.c:2258
> #21 0x000002aa16925534 in object_property_set
> (obj=obj@entry=0x2aa18f72b80, name=name@entry=0x2aa16ccfd74
> "realized", v=v@entry=
>     0x2aa18fbe450, errp=errp@entry=0x2aa17484fe0 <error_fatal>) at
> ../../qom/object.c:1403
> #22 0x000002aa1692229c in object_property_set_qobject
> (obj=obj@entry=0x2aa18f72b80, name=name@entry=0x2aa16ccfd74
> "realized", value=value@entry=0x2aa18fbe580,
> errp=errp@entry=0x2aa17484fe0 <error_fatal>) at
> ../../qom/qom-qobject.c:28
> #23 0x000002aa169257e2 in object_property_set_bool (obj=0x2aa18f72b80,
> name=name@entry=0x2aa16ccfd74 "realized", value=value@entry=true,
> errp=0x2aa17484fe0 <error_fatal>, errp@entry=<error reading variable:
> value has been optimized out>) at ../../qom/object.c:1473
> #24 0x000002aa169fb87e in qdev_realize (dev=<optimized out>,
> bus=bus@entry=0x2aa18384200, errp=<error reading variable: value has
> been optimized out>) at ../../hw/core/qdev.c:389
> #25 0x000002aa165f1c6c in qdev_device_add (opts=<optimized out>,
> errp=<optimized out>, errp@entry=0x2aa17484fe0 <error_fatal>)
>     at ../../softmmu/qdev-monitor.c:673
> #26 0x000002aa168b2086 in device_init_func (opaque=<optimized out>,
> opts=<optimized out>, errp=0x2aa17484fe0 <error_fatal>)
>     at ../../softmmu/vl.c:1187
> #27 0x000002aa16b2b906 in qemu_opts_foreach (list=<optimized out>,
> func=func@entry=0x2aa168b2068 <device_init_func>,
> opaque=opaque@entry=0x0, errp=0x2aa17484fe0 <error_fatal>) at
> ../../util/qemu-option.c:1135
> #28 0x000002aa168b5046 in qemu_create_cli_devices () at ../../softmmu/vl.c:2645
> #29 0x000002aa168b5046 in qmp_x_exit_preconfig (errp=<optimized out>)
> at ../../softmmu/vl.c:2693
> #30 0x000002aa168b83ce in qemu_init (argc=<optimized out>,
> argv=<optimized out>, envp=<optimized out>) at ../../softmmu/vl.c:3713
> #31 0x000002aa1654f6da in main (argc=<optimized out>, argv=<optimized
> out>, envp=<optimized out>) at ../../softmmu/main.c:49
> 
> 
> Backtrace, qos-test:
> 
> Thread 2 (Thread 0x3ff9de7f910 (LWP 59555)):
> #0  syscall () at ../sysdeps/unix/sysv/linux/s390/s390-64/syscall.S:53
> #1  0x000002aa20c36a54 in qemu_futex_wait (val=<optimized out>,
> f=<optimized out>) at /home/ubuntu/qemu/include/qemu/futex.h:29
> #2  qemu_event_wait (ev=ev@entry=0x2aa20cdc3a0 <rcu_call_ready_event>)
> at ../../util/qemu-thread-posix.c:480
> #3  0x000002aa20c4efa4 in call_rcu_thread (opaque=opaque@entry=0x0) at
> ../../util/rcu.c:258
> #4  0x000002aa20c35a6a in qemu_thread_start (args=<optimized out>) at
> ../../util/qemu-thread-posix.c:541
> #5  0x000003ff9e887aa8 in start_thread (arg=0x3ff9de7f910) at
> pthread_create.c:463
> #6  0x000003ff9e77a11e in thread_start () at
> ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65
> 
> Thread 1 (Thread 0x3ff9eef0940 (LWP 59553)):
> #0  0x000003ff9e89165a in __libc_read (fd=<optimized out>,
> buf=buf@entry=0x3ffc10fd3e0, nbytes=nbytes@entry=1024)
>     at ../sysdeps/unix/sysv/linux/read.c:27
> #1  0x000002aa20bdee7a in read (__nbytes=1024, __buf=0x3ffc10fd3e0,
> __fd=<optimized out>)
>     at /usr/include/s390x-linux-gnu/bits/unistd.h:44
> #2  qtest_client_socket_recv_line (s=0x2aa22379fc0) at
> ../../tests/qtest/libqtest.c:494
> #3  0x000002aa20bdf032 in qtest_rsp_args (s=s@entry=0x2aa22379fc0,
> expected_args=expected_args@entry=1)
>     at ../../tests/qtest/libqtest.c:521
> #4  0x000002aa20bdf6f2 in qtest_query_target_endianness
> (s=0x2aa22379fc0) at ../../tests/qtest/libqtest.c:570
> #5  0x000002aa20be0312 in qtest_init_without_qmp_handshake
> (extra_args=<optimized out>) at ../../tests/qtest/libqtest.c:332
> #6  0x000002aa20be09e2 in qtest_init (extra_args=<optimized out>) at
> ../../tests/qtest/libqtest.c:339
> #7  0x000002aa20bc7b76 in qtest_start (
>     args=0x2aa22389ef0 "-M pc  -device
> vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
> memory-backend-memfd,id=mem,size=256M,share=on  -M memory-backend=mem
> -m 256M -chardev socket,id=char1,path=/tmp/qtest-59553-so"...)
>     at ../../tests/qtest/libqtest-single.h:29
> #8  restart_qemu_or_continue (
>     path=0x2aa22389ef0 "-M pc  -device
> vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
> memory-backend-memfd,id=mem,size=256M,share=on  -M memory-backend=mem
> -m 256M -chardev socket,id=char1,path=/tmp/qtest-59553-so"...) at
> ../../tests/qtest/qos-test.c:105
> #9  run_one_test (arg=<optimized out>) at ../../tests/qtest/qos-test.c:178
> #10 0x000003ff9eb7c70c in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #11 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #12 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #13 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #14 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #15 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #16 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #17 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #18 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #19 0x000003ff9eb7c8ee in g_test_run_suite () from
> /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #20 0x000003ff9eb7c928 in g_test_run () from
> /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> #21 0x000002aa20bc6c9a in main (argc=<optimized out>, argv=<optimized
> out>, envp=<optimized out>) at ../../tests/qtest/qos-test.c:338
> 
> Backtrace, qemu-storage-daemon:
> 
> Thread 2 (Thread 0x3ff951b7910 (LWP 60132)):
> #0  syscall () at ../sysdeps/unix/sysv/linux/s390/s390-64/syscall.S:53
> #1  0x000002aa0184fa6c in qemu_futex_wait (val=<optimized out>,
> f=<optimized out>) at /home/ubuntu/qemu/include/qemu/futex.h:29
> #2  qemu_event_wait (ev=ev@entry=0x2aa01971158 <rcu_call_ready_event>)
> at ../../util/qemu-thread-posix.c:480
> #3  0x000002aa0187498c in call_rcu_thread (opaque=opaque@entry=0x0) at
> ../../util/rcu.c:258
> #4  0x000002aa0184ea82 in qemu_thread_start (args=<optimized out>) at
> ../../util/qemu-thread-posix.c:541
> #5  0x000003ffa0507aa8 in start_thread (arg=0x3ff951b7910) at
> pthread_create.c:463
> #6  0x000003ffa03fa11e in thread_start () at
> ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65
> 
> Thread 1 (Thread 0x3ffa14590e0 (LWP 60122)):
> #0  0x000003ffa03ef302 in __GI_ppoll (fds=0x2aa0332b3d0, nfds=5,
> timeout=<optimized out>, timeout@entry=0x0,
>     sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39
> #1  0x000002aa0185b054 in ppoll (__ss=0x0, __timeout=0x0,
> __nfds=<optimized out>, __fds=<optimized out>)
>     at /usr/include/s390x-linux-gnu/bits/poll2.h:77
> #2  qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>,
> timeout=timeout@entry=-1) at ../../util/qemu-timer.c:336
> #3  0x000002aa01851e48 in os_host_main_loop_wait (timeout=-1) at
> ../../util/main-loop.c:250
> #4  main_loop_wait (nonblocking=<optimized out>) at ../../util/main-loop.c:531
> #5  0x000002aa01738dda in main (argc=<optimized out>,
> argv=0x3ffd28feba8) at ../../storage-daemon/qemu-storage-daemon.c:345
> 
> 
> thanks
> -- PMM



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: intermittent failure, s390 host, x86-64 guest, vhost-user-blk test
  2021-08-04 13:43 ` Michael S. Tsirkin
@ 2021-08-04 14:33   ` Peter Maydell
  2021-08-05 17:39     ` Peter Maydell
  2021-08-27 16:53   ` Raphael Norwitz
  1 sibling, 1 reply; 6+ messages in thread
From: Peter Maydell @ 2021-08-04 14:33 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Kevin Wolf, QEMU Developers, Raphael Norwitz, Stefan Hajnoczi,
	Paolo Bonzini, Max Reitz, Eric Blake, Stefano Garzarella

On Wed, 4 Aug 2021 at 14:43, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Aug 04, 2021 at 01:40:37PM +0100, Peter Maydell wrote:
> > Saw this intermittent as part of my ad-hoc CI test setup.
> > The backtrace says that the QEMU process has somehow hung
> > during 'realize' of the vhost-user device...
>
> Hmm seems to have to do with the daemon not responding ...
> Cc a bunch more people ...

Here's another one; same thing, but a ppc64 guest, I think.

Process tree:
qos-test(34767)-+-qemu-storage-da(35116)
                `-qemu-system-ppc(35117)
===========================================================
PROCESS: 34767
ubuntu   34767 34766  0 09:03 ?        00:00:00 tests/qtest/qos-test
--tap -k -m quick
[New LWP 34769]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/s390x-linux-gnu/libthread_db.so.1".
0x000003ff9b51165a in __libc_read (fd=<optimized out>,
buf=buf@entry=0x3ffcd77e180, nbytes=nbytes@entry=1024) at
../sysdeps/unix/sysv/linux/read.c:27
27      ../sysdeps/unix/sysv/linux/read.c: No such file or directory.

Thread 2 (Thread 0x3ff9aaff910 (LWP 34769)):
#0  syscall () at ../sysdeps/unix/sysv/linux/s390/s390-64/syscall.S:53
#1  0x000002aa06736a54 in qemu_futex_wait (val=<optimized out>,
f=<optimized out>) at /home/ubuntu/qemu/include/qemu/futex.h:29
#2  qemu_event_wait (ev=ev@entry=0x2aa067dc3a0 <rcu_call_ready_event>)
at ../../util/qemu-thread-posix.c:480
#3  0x000002aa0674efa4 in call_rcu_thread (opaque=opaque@entry=0x0) at
../../util/rcu.c:258
#4  0x000002aa06735a6a in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:541
#5  0x000003ff9b507aa8 in start_thread (arg=0x3ff9aaff910) at
pthread_create.c:463
#6  0x000003ff9b3fa11e in thread_start () at
../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65

Thread 1 (Thread 0x3ff9bb70940 (LWP 34767)):
#0  0x000003ff9b51165a in __libc_read (fd=<optimized out>,
buf=buf@entry=0x3ffcd77e180, nbytes=nbytes@entry=1024) at
../sysdeps/unix/sysv/linux/read.c:27
#1  0x000002aa066dee7a in read (__nbytes=1024, __buf=0x3ffcd77e180,
__fd=<optimized out>) at /usr/include/s390x-linux-gnu/bits/unistd.h:44
#2  qtest_client_socket_recv_line (s=0x2aa0775f900) at
../../tests/qtest/libqtest.c:494
#3  0x000002aa066df032 in qtest_rsp_args (s=s@entry=0x2aa0775f900,
expected_args=expected_args@entry=1) at
../../tests/qtest/libqtest.c:521
#4  0x000002aa066df6f2 in qtest_query_target_endianness
(s=0x2aa0775f900) at ../../tests/qtest/libqtest.c:570
#5  0x000002aa066e0312 in qtest_init_without_qmp_handshake
(extra_args=<optimized out>) at ../../tests/qtest/libqtest.c:332
#6  0x000002aa066e09e2 in qtest_init (extra_args=<optimized out>) at
../../tests/qtest/libqtest.c:339
#7  0x000002aa066c7b76 in qtest_start (args=0x2aa07769df0 "-M pseries
-device vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
memory-backend-memfd,id=mem,size=256M,share=on  -M memory-backend=mem
-m 256M -chardev socket,id=char1,path=/tmp/qtest-347"...) at
../../tests/qtest/libqtest-single.h:29
#8  restart_qemu_or_continue (path=0x2aa07769df0 "-M pseries  -device
vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
memory-backend-memfd,id=mem,size=256M,share=on  -M memory-backend=mem
-m 256M -chardev socket,id=char1,path=/tmp/qtest-347"...) at
../../tests/qtest/qos-test.c:105
#9  run_one_test (arg=<optimized out>) at ../../tests/qtest/qos-test.c:178
#10 0x000003ff9b7fc70c in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#11 0x000003ff9b7fc648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#12 0x000003ff9b7fc648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#13 0x000003ff9b7fc648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#14 0x000003ff9b7fc648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#15 0x000003ff9b7fc648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#16 0x000003ff9b7fc648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#17 0x000003ff9b7fc648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#18 0x000003ff9b7fc8ee in g_test_run_suite () from
/usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#19 0x000003ff9b7fc928 in g_test_run () from
/usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#20 0x000002aa066c6c9a in main (argc=<optimized out>, argv=<optimized
out>, envp=<optimized out>) at ../../tests/qtest/qos-test.c:338

===========================================================
PROCESS: 35116
ubuntu   35116 34767  0 09:03 ?        00:00:00
./storage-daemon/qemu-storage-daemon --blockdev
driver=file,node-name=disk0,filename=qtest.7TNoNG --export
type=vhost-user-blk,id=disk0,addr.type=unix,addr.path=/tmp/qtest-34767-sock.OysARz,node-name=disk0,writable=on,num-queues=8
--blockdev driver=file,node-name=disk1,filename=qtest.jZXeFU --export
type=vhost-user-blk,id=disk1,addr.type=unix,addr.path=/tmp/qtest-34767-sock.2aniJN,node-name=disk1,writable=on,num-queues=8
[New LWP 35124]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/s390x-linux-gnu/libthread_db.so.1".
0x000003ff8b66f302 in __GI_ppoll (fds=0x2aa365db4b0, nfds=6,
timeout=<optimized out>, timeout@entry=0x0, sigmask=sigmask@entry=0x0)
at ../sysdeps/unix/sysv/linux/ppoll.c:39
39      ../sysdeps/unix/sysv/linux/ppoll.c: No such file or directory.

Thread 2 (Thread 0x3ff80437910 (LWP 35124)):
#0  syscall () at ../sysdeps/unix/sysv/linux/s390/s390-64/syscall.S:53
#1  0x000002aa356cfa6c in qemu_futex_wait (val=<optimized out>,
f=<optimized out>) at /home/ubuntu/qemu/include/qemu/futex.h:29
#2  qemu_event_wait (ev=ev@entry=0x2aa357f1158 <rcu_call_ready_event>)
at ../../util/qemu-thread-posix.c:480
#3  0x000002aa356f498c in call_rcu_thread (opaque=opaque@entry=0x0) at
../../util/rcu.c:258
#4  0x000002aa356cea82 in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:541
#5  0x000003ff8b787aa8 in start_thread (arg=0x3ff80437910) at
pthread_create.c:463
#6  0x000003ff8b67a11e in thread_start () at
../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65

Thread 1 (Thread 0x3ff8c6d90e0 (LWP 35116)):
#0  0x000003ff8b66f302 in __GI_ppoll (fds=0x2aa365db4b0, nfds=6,
timeout=<optimized out>, timeout@entry=0x0, sigmask=sigmask@entry=0x0)
at ../sysdeps/unix/sysv/linux/ppoll.c:39
#1  0x000002aa356db054 in ppoll (__ss=0x0, __timeout=0x0,
__nfds=<optimized out>, __fds=<optimized out>) at
/usr/include/s390x-linux-gnu/bits/poll2.h:77
#2  qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>,
timeout=timeout@entry=-1) at ../../util/qemu-timer.c:336
#3  0x000002aa356d1e48 in os_host_main_loop_wait (timeout=-1) at
../../util/main-loop.c:250
#4  main_loop_wait (nonblocking=<optimized out>) at ../../util/main-loop.c:531
#5  0x000002aa355b8dda in main (argc=<optimized out>,
argv=0x3ffe35ff3a8) at ../../storage-daemon/qemu-storage-daemon.c:345

===========================================================
PROCESS: 35117
ubuntu   35117 34767  0 09:03 ?        00:00:00 ./qemu-system-ppc64
-qtest unix:/tmp/qtest-34767.sock -qtest-log /dev/null -chardev
socket,path=/tmp/qtest-34767.qmp,id=char0 -mon
chardev=char0,mode=control -display none -M pseries -device
vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
memory-backend-memfd,id=mem,size=256M,share=on -M memory-backend=mem
-m 256M -chardev socket,id=char1,path=/tmp/qtest-34767-sock.OysARz
-chardev socket,id=char2,path=/tmp/qtest-34767-sock.2aniJN -accel
qtest
[New LWP 35121]
[New LWP 35122]
[New LWP 35123]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/s390x-linux-gnu/libthread_db.so.1".
0x000003ffadc6f1b0 in __GI___poll (fds=0x2aa30cf1e70, nfds=2,
timeout=<optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:29
29      ../sysdeps/unix/sysv/linux/poll.c: No such file or directory.

Thread 4 (Thread 0x3ff8f8c2910 (LWP 35123)):
#0  0x000003ffadd911f6 in __lll_lock_wait
(futex=futex@entry=0x2aa2fd3b5e0 <qemu_global_mutex>,
private=<optimized out>) at lowlevellock.c:46
#1  0x000003ffadd8a204 in __GI___pthread_mutex_lock
(mutex=mutex@entry=0x2aa2fd3b5e0 <qemu_global_mutex>) at
../nptl/pthread_mutex_lock.c:80
#2  0x000002aa2f380c36 in qemu_mutex_lock_impl (mutex=0x2aa2fd3b5e0
<qemu_global_mutex>, file=0x2aa2f4f6b98 "../../accel/dummy-cpus.c",
line=<optimized out>) at ../../util/qemu-thread-posix.c:79
#3  0x000002aa2f06dde0 in qemu_mutex_lock_iothread_impl
(file=file@entry=0x2aa2f4f6b98 "../../accel/dummy-cpus.c",
line=line@entry=52) at ../../softmmu/cpus.c:493
#4  0x000002aa2f0e706c in dummy_cpu_thread_fn
(arg=arg@entry=0x2aa310a4c00) at ../../accel/dummy-cpus.c:52
#5  0x000002aa2f380a7a in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:541
#6  0x000003ffadd87aa8 in start_thread (arg=0x3ff8f8c2910) at
pthread_create.c:463
#7  0x000003ffadc7a11e in thread_start () at
../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65

Thread 3 (Thread 0x3ff901b4910 (LWP 35122)):
#0  0x000003ffadc6f1b0 in __GI___poll (fds=0x3ff800031f0, nfds=3,
timeout=<optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:29
#1  0x000003ffaf2d2624 in  () at /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#2  0x000003ffaf2d2aa8 in g_main_loop_run () at
/usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#3  0x000002aa2f1b4aa6 in iothread_run
(opaque=opaque@entry=0x2aa3093cb80) at ../../iothread.c:73
#4  0x000002aa2f380a7a in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:541
#5  0x000003ffadd87aa8 in start_thread (arg=0x3ff901b4910) at
pthread_create.c:463
#6  0x000003ffadc7a11e in thread_start () at
../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65

Thread 2 (Thread 0x3ffa0ab7910 (LWP 35121)):
#0  0x000003ffadd911f6 in __lll_lock_wait
(futex=futex@entry=0x2aa2fd3b5e0 <qemu_global_mutex>,
private=<optimized out>) at lowlevellock.c:46
#1  0x000003ffadd8a204 in __GI___pthread_mutex_lock
(mutex=mutex@entry=0x2aa2fd3b5e0 <qemu_global_mutex>) at
../nptl/pthread_mutex_lock.c:80
#2  0x000002aa2f380c36 in qemu_mutex_lock_impl (mutex=0x2aa2fd3b5e0
<qemu_global_mutex>, file=0x2aa2f919570 "../../util/rcu.c",
line=<optimized out>) at ../../util/qemu-thread-posix.c:79
#3  0x000002aa2f06dde0 in qemu_mutex_lock_iothread_impl
(file=file@entry=0x2aa2f919570 "../../util/rcu.c",
line=line@entry=266) at ../../softmmu/cpus.c:493
#4  0x000002aa2f3aa3ca in call_rcu_thread (opaque=opaque@entry=0x0) at
../../util/rcu.c:266
#5  0x000002aa2f380a7a in qemu_thread_start (args=<optimized out>) at
../../util/qemu-thread-posix.c:541
#6  0x000003ffadd87aa8 in start_thread (arg=0x3ffa0ab7910) at
pthread_create.c:463
#7  0x000003ffadc7a11e in thread_start () at
../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65

Thread 1 (Thread 0x3ffb0935470 (LWP 35117)):
#0  0x000003ffadc6f1b0 in __GI___poll (fds=0x2aa30cf1e70, nfds=2,
timeout=<optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:29
#1  0x000003ffaf2d2624 in  () at /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#2  0x000003ffaf2d2aa8 in g_main_loop_run () at
/usr/lib/s390x-linux-gnu/libglib-2.0.so.0
#3  0x000002aa2f16b4ce in vhost_user_read
(dev=dev@entry=0x2aa31c07e98, msg=msg@entry=0x3fff4c7ddac) at
../../hw/virtio/vhost-user.c:402
#4  0x000002aa2f16c85e in vhost_user_get_u64
(dev=dev@entry=0x2aa31c07e98, request=request@entry=1,
u64=u64@entry=0x3fff4c7e0e8) at ../../hw/virtio/vhost-user.c:1334
#5  0x000002aa2f16e378 in vhost_user_get_features
(features=0x3fff4c7e0e8, dev=0x2aa31c07e98) at
../../hw/virtio/vhost-user.c:1356
#6  0x000002aa2f16e378 in vhost_user_backend_init (dev=0x2aa31c07e98,
opaque=0x2aa31c08110, errp=0x3fff4c7e420) at
../../hw/virtio/vhost-user.c:1877
#7  0x000002aa2f08c354 in vhost_dev_init
(hdev=hdev@entry=0x2aa31c07e98, opaque=opaque@entry=0x2aa31c08110,
backend_type=backend_type@entry=VHOST_BACKEND_TYPE_USER,
busyloop_timeout=busyloop_timeout@entry=0,
errp=errp@entry=0x3fff4c7e420) at ../../hw/virtio/vhost.c:1302
#8  0x000002aa2f11721a in vhost_user_blk_connect
(dev=dev@entry=0x2aa31c07c48, errp=errp@entry=0x3fff4c7e420) at
../../hw/block/vhost-user-blk.c:339
#9  0x000002aa2f11771c in vhost_user_blk_realize_connect
(errp=0x3fff4c7e420, s=0x2aa31c07c48) at
../../hw/block/vhost-user-blk.c:440
#10 0x000002aa2f11771c in vhost_user_blk_device_realize
(dev=0x2aa31c07c48, errp=0x3fff4c7e420) at
../../hw/block/vhost-user-blk.c:513
#11 0x000002aa2f065078 in virtio_device_realize (dev=0x2aa31c07c48,
errp=0x3fff4c7e4e0) at ../../hw/virtio/virtio.c:3659
#12 0x000002aa2f2569d0 in device_set_realized (obj=<optimized out>,
value=<optimized out>, errp=0x3fff4c7e640) at ../../hw/core/qdev.c:761
#13 0x000002aa2f17c73a in property_set_bool
(obj=obj@entry=0x2aa31c07c48, v=v@entry=0x2aa31c515b0,
name=name@entry=0x2aa2f54928c "realized", opaque=0x2aa309ee830,
errp=errp@entry=0x3fff4c7e640) at ../../qom/object.c:2258
#14 0x000002aa2f17f2b4 in object_property_set
(obj=obj@entry=0x2aa31c07c48, name=name@entry=0x2aa2f54928c
"realized", v=v@entry=0x2aa31c515b0, errp=errp@entry=0x3fff4c7e930) at
../../qom/object.c:1403
#15 0x000002aa2f17c01c in object_property_set_qobject
(obj=obj@entry=0x2aa31c07c48, name=name@entry=0x2aa2f54928c
"realized", value=value@entry=0x2aa31c514f0,
errp=errp@entry=0x3fff4c7e930) at ../../qom/qom-qobject.c:28
#16 0x000002aa2f17f562 in object_property_set_bool (obj=0x2aa31c07c48,
name=0x2aa2f54928c "realized", value=<optimized out>,
errp=0x3fff4c7e930) at ../../qom/object.c:1473
#17 0x000002aa2ef3d27a in virtio_pci_realize (pci_dev=0x2aa31bffa50,
errp=0x3fff4c7e930) at ../../hw/virtio/virtio-pci.c:1907
#18 0x000002aa2ed95040 in pci_qdev_realize (qdev=0x2aa31bffa50,
errp=<optimized out>) at ../../hw/pci/pci.c:2148
#19 0x000002aa2f2569d0 in device_set_realized (obj=<optimized out>,
value=<optimized out>, errp=0x3fff4c7eb98) at ../../hw/core/qdev.c:761
#20 0x000002aa2f17c73a in property_set_bool
(obj=obj@entry=0x2aa31bffa50, v=v@entry=0x2aa31c4b500,
name=name@entry=0x2aa2f54928c "realized", opaque=0x2aa309ee830,
errp=errp@entry=0x3fff4c7eb98) at ../../qom/object.c:2258
#21 0x000002aa2f17f2b4 in object_property_set
(obj=obj@entry=0x2aa31bffa50, name=name@entry=0x2aa2f54928c
"realized", v=v@entry=0x2aa31c4b500, errp=errp@entry=0x2aa2fd583b8
<error_fatal>) at ../../qom/object.c:1403
#22 0x000002aa2f17c01c in object_property_set_qobject
(obj=obj@entry=0x2aa31bffa50, name=name@entry=0x2aa2f54928c
"realized", value=value@entry=0x2aa31c4b480,
errp=errp@entry=0x2aa2fd583b8 <error_fatal>) at
../../qom/qom-qobject.c:28
#23 0x000002aa2f17f562 in object_property_set_bool (obj=0x2aa31bffa50,
name=name@entry=0x2aa2f54928c "realized", value=value@entry=true,
errp=0x2aa2fd583b8 <error_fatal>, errp@entry=<error reading variable:
value has been optimized out>) at ../../qom/object.c:1473
#24 0x000002aa2f2555fe in qdev_realize (dev=<optimized out>,
bus=bus@entry=0x2aa31154980, errp=<error reading variable: value has
been optimized out>) at ../../hw/core/qdev.c:389
#25 0x000002aa2eda6ca4 in qdev_device_add (opts=<optimized out>,
errp=<optimized out>, errp@entry=0x2aa2fd583b8 <error_fatal>) at
../../softmmu/qdev-monitor.c:673
#26 0x000002aa2f10c23e in device_init_func (opaque=<optimized out>,
opts=<optimized out>, errp=0x2aa2fd583b8 <error_fatal>) at
../../softmmu/vl.c:1187
#27 0x000002aa2f389426 in qemu_opts_foreach (list=<optimized out>,
func=func@entry=0x2aa2f10c220 <device_init_func>,
opaque=opaque@entry=0x0, errp=0x2aa2fd583b8 <error_fatal>) at
../../util/qemu-option.c:1135
#28 0x000002aa2f10f1fe in qemu_create_cli_devices () at ../../softmmu/vl.c:2645
#29 0x000002aa2f10f1fe in qmp_x_exit_preconfig (errp=<optimized out>)
at ../../softmmu/vl.c:2693
#30 0x000002aa2f112586 in qemu_init (argc=<optimized out>,
argv=<optimized out>, envp=<optimized out>) at ../../softmmu/vl.c:3713
#31 0x000002aa2ecff442 in main (argc=<optimized out>, argv=<optimized
out>, envp=<optimized out>) at ../../softmmu/main.c:49

-- PMM


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: intermittent failure, s390 host, x86-64 guest, vhost-user-blk test
  2021-08-04 14:33   ` Peter Maydell
@ 2021-08-05 17:39     ` Peter Maydell
  2021-08-06 10:36       ` Peter Maydell
  0 siblings, 1 reply; 6+ messages in thread
From: Peter Maydell @ 2021-08-05 17:39 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Kevin Wolf, QEMU Developers, Raphael Norwitz, Stefan Hajnoczi,
	Paolo Bonzini, Max Reitz, Eric Blake, Stefano Garzarella

On Wed, 4 Aug 2021 at 15:33, Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On Wed, 4 Aug 2021 at 14:43, Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Aug 04, 2021 at 01:40:37PM +0100, Peter Maydell wrote:
> > > Saw this intermittent as part of my ad-hoc CI test setup.
> > > The backtrace says that the QEMU process has somehow hung
> > > during 'realize' of the vhost-user device...
> >
> > Hmm seems to have to do with the daemon not responding ...
> > Cc a bunch more people ...
>
> Here's another one; same thing, but a ppc64 guest, I think.

Ran into this again; I'll spare you the backtraces but it was the
same 'hang in realize', this one for qemu-system-i386. Same s390 host.
This seems to be a fairly frequent intermittent -- can somebody
have a look at it ?

-- PMM


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: intermittent failure, s390 host, x86-64 guest, vhost-user-blk test
  2021-08-05 17:39     ` Peter Maydell
@ 2021-08-06 10:36       ` Peter Maydell
  0 siblings, 0 replies; 6+ messages in thread
From: Peter Maydell @ 2021-08-06 10:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Kevin Wolf, QEMU Developers, Raphael Norwitz, Stefan Hajnoczi,
	Paolo Bonzini, Max Reitz, Eric Blake, Stefano Garzarella

On Thu, 5 Aug 2021 at 18:39, Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On Wed, 4 Aug 2021 at 15:33, Peter Maydell <peter.maydell@linaro.org> wrote:
> >
> > On Wed, 4 Aug 2021 at 14:43, Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Wed, Aug 04, 2021 at 01:40:37PM +0100, Peter Maydell wrote:
> > > > Saw this intermittent as part of my ad-hoc CI test setup.
> > > > The backtrace says that the QEMU process has somehow hung
> > > > during 'realize' of the vhost-user device...
> > >
> > > Hmm seems to have to do with the daemon not responding ...
> > > Cc a bunch more people ...
> >
> > Here's another one; same thing, but a ppc64 guest, I think.
>
> Ran into this again; I'll spare you the backtraces but it was the
> same 'hang in realize', this one for qemu-system-i386. Same s390 host.
> This seems to be a fairly frequent intermittent -- can somebody
> have a look at it ?

I think I have also now seen this hang on aarch64 host:

pm       2507739  0.0  0.0   7300  3104 ?        S    09:29   0:00
             \_ bash -o pipefail -c echo
'MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
QTEST_QEMU_IMG=./qemu-img
G_TEST_DBUS_DAEMON=/home/pm/qemu/tests/dbus-vmstate-daemon.sh
QTEST_QEMU_BINARY=./qemu-system-i386
QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
tests/qtest/qos-test --tap -k' &&
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
QTEST_QEMU_IMG=./qemu-img
G_TEST_DBUS_DAEMON=/home/pm/qemu/tests/dbus-vmstate-daemon.sh
QTEST_QEMU_BINARY=./qemu-system-i386
QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
tests/qtest/qos-test --tap -k -m quick < /dev/null |
./scripts/tap-driver.pl --test-name="qtest-i386/qos-test"
pm       2507744  0.0  0.0  88952  8292 ?        Sl   09:29   0:00
                 \_ tests/qtest/qos-test --tap -k -m quick
pm       2515688  0.0  0.0 333288 21044 ?        Sl   09:30   0:00
                 |   \_ ./storage-daemon/qemu-storage-daemon
--blockdev driver=file,node-name=disk0,filename=qtest.EjWt12 --export
type=vhost-user-blk,id=disk0,addr.type=unix,addr.path=/tmp/qtest-2507744-sock.s5wii0,node-name=disk0,writable=on,num-queues=1
pm       2515689  0.0  0.0 869272 65868 ?        Sl   09:30   0:00
                 |   \_ ./qemu-system-i386 -qtest
unix:/tmp/qtest-2507744.sock -qtest-log /dev/null -chardev
socket,path=/tmp/qtest-2507744.qmp,id=char0 -mon
chardev=char0,mode=control -display none -M pc -device
vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
memory-backend-memfd,id=mem,size=256M,share=on -M memory-backend=mem
-m 256M -chardev socket,id=char1,path=/tmp/qtest-2507744-sock.s5wii0
-accel qtest
pm       2507745  0.0  0.0  17404 11244 ?        S    09:29   0:00
                 \_ perl ./scripts/tap-driver.pl
--test-name=qtest-i386/qos-test


Not 100% sure it's the same hang, because the machine doesn't let
me do ptrace-attach-to-process, but it seems likely.

-- PMM


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: intermittent failure, s390 host, x86-64 guest, vhost-user-blk test
  2021-08-04 13:43 ` Michael S. Tsirkin
  2021-08-04 14:33   ` Peter Maydell
@ 2021-08-27 16:53   ` Raphael Norwitz
  1 sibling, 0 replies; 6+ messages in thread
From: Raphael Norwitz @ 2021-08-27 16:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Kevin Wolf, Peter Maydell, QEMU Developers, Raphael Norwitz,
	Stefan Hajnoczi, Paolo Bonzini, Max Reitz, Eric Blake,
	Stefano Garzarella

On Wed, Aug 04, 2021 at 09:43:25AM -0400, Michael S. Tsirkin wrote:
> On Wed, Aug 04, 2021 at 01:40:37PM +0100, Peter Maydell wrote:
> > Saw this intermittent as part of my ad-hoc CI test setup.
> > The backtrace says that the QEMU process has somehow hung
> > during 'realize' of the vhost-user device...
> 
> Hmm seems to have to do with the daemon not responding ...
> Cc a bunch more people ...

On a recent master with the meson dependency fix (commit ID:
cc1838c25d55e7f478cd493431679337e24e1b72) I’ve been able to reproduce
this quite easily with the following command from the make check-qtest
V=1 output.

#!/bin/sh

 I="0"
 while [ true ] 
 do 
 echo "=== iteration $I ===" 

 ALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))} \
     QTEST_QEMU_IMG=./qemu-img \
     G_TEST_DBUS_DAEMON=/home/raphael/pub_src/qemu_checkouts/qemu/tests/dbus-vmstate-daemon.sh \
     QTEST_QEMU_BINARY=./qemu-system-x86_64 \
     QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon \
     tests/qtest/qos-test --tap -k 

I=$((I+1)) 
done 

I typically hit it within 10 iterations.

Looking at the trace provided, we see that QEMU has sent a
VHOST_USER_GET_PROTOCOL_FEATURES message and is stuck waiting for a
reply. Meanwhile the storage-daemon is stuck waiting for a message. In
the vhost-user-blk-test, as of now there is nothing stoping
vhost-user-blk in QEMU writing to the socket right after forking off the
storage daemon before it has a chance to come up properly, leaving the
test hanging forever. I’ve seen this happen before with other
backends/device types. I added a one second sleep to the test case
after the test forks off the storage daemon, and it resolved the hang
completely for me. I ran the test for 1000+ iterations and couldn’t
reproduce the failure with the sleep.

I just mailed some patches which pipe the stdout of the storage-daemon
back to he vhost-user-blk-test. The storage-daemon prints a log when
it is fully initialized which wakes up vhost-user-blk-test. This
should reliably resolve the hang.

I’m thinking we could also change the vhost-user code to timeout (or
possibly retry?) if the first VHOST_USER_GET_PROTOCOL_FEATURES
message doesn’t receive a response, as it’s better to fail power-on then
hang forever. If others agree I can pull together an RFC.

> 
> > Process tree:
> > 
> > ubuntu   59552  0.0  0.0   5460  3120 ?        S    06:51   0:00
> >            \_ bash -o pipefail -c echo
> > 'MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
> > QTEST_QEMU_IMG=./qemu-img
> > G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
> > QTEST_QEMU_BINARY=./qemu-system-x86_64
> > QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
> > tests/qtest/qos-test --tap -k' &&
> > MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
> > QTEST_QEMU_IMG=./qemu-img
> > G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
> > QTEST_QEMU_BINARY=./qemu-system-x86_64
> > QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
> > tests/qtest/qos-test --tap -k -m quick < /dev/null |
> > ./scripts/tap-driver.pl --test-name="qtest-x86_64/qos-test"
> > ubuntu   59553  0.0  0.2  88008  8332 ?        Sl   06:51   0:00
> >                \_ tests/qtest/qos-test --tap -k -m quick
> > ubuntu   60122  0.0  0.5 254432 21516 ?        Sl   06:51   0:00
> >                |   \_ ./storage-daemon/qemu-storage-daemon --blockdev
> > driver=file,node-name=disk0,filename=qtest.trJnuR --export
> > type=vhost-user-blk,id=disk0,addr.type=unix,addr.path=/tmp/qtest-59553-sock.iRt6WL,node-name=disk0,writable=on,num-queues=1
> > ubuntu   60123  0.0  1.6 747952 62732 ?        Sl   06:51   0:00
> >                |   \_ ./qemu-system-x86_64 -qtest
> > unix:/tmp/qtest-59553.sock -qtest-log /dev/null -chardev
> > socket,path=/tmp/qtest-59553.qmp,id=char0 -mon
> > chardev=char0,mode=control -display none -M pc -device
> > vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
> > memory-backend-memfd,id=mem,size=256M,share=on -M memory-backend=mem
> > -m 256M -chardev socket,id=char1,path=/tmp/qtest-59553-sock.iRt6WL
> > -accel qtest
> > ubuntu   59554  0.0  0.2  14116 11204 ?        S    06:51   0:00
> >                \_ perl ./scripts/tap-driver.pl
> > --test-name=qtest-x86_64/qos-test
> > 
> > Backtrace, qemu process:
> > 
> > Thread 4 (Thread 0x3ff5be1d910 (LWP 60131)):
> > #0  0x000003ff7e2bed6a in __GI___sigtimedwait (set=<optimized out>,
> >     set@entry=0x3ff5be1a098, info=info@entry=0x3ff5be19f68,
> > timeout=timeout@entry=0x0)
> >     at ../sysdeps/unix/sysv/linux/sigtimedwait.c:42
> > #1  0x000003ff7e492ca8 in __sigwait (set=set@entry=0x3ff5be1a098,
> > sig=sig@entry=0x3ff5be1a094)
> >     at ../sysdeps/unix/sysv/linux/sigwait.c:28
> > #2  0x000002aa16891ab2 in dummy_cpu_thread_fn
> > (arg=arg@entry=0x2aa182103e0) at ../../accel/dummy-cpus.c:46
> > #3  0x000002aa16b22f5a in qemu_thread_start (args=<optimized out>) at
> > ../../util/qemu-thread-posix.c:541
> > #4  0x000003ff7e487aa8 in start_thread (arg=0x3ff5be1d910) at
> > pthread_create.c:463
> > #5  0x000003ff7e37a11e in thread_start () at
> > ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65
> > 
> > Thread 3 (Thread 0x3ff608b4910 (LWP 60130)):
> > #0  0x000003ff7e36f1b0 in __GI___poll (fds=0x3ff540031f0, nfds=3,
> > timeout=<optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:29
> > #1  0x000003ff7f9d2624 in  () at /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #2  0x000003ff7f9d2aa8 in g_main_loop_run () at
> > /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #3  0x000002aa1695ad26 in iothread_run
> > (opaque=opaque@entry=0x2aa17ea2b80) at ../../iothread.c:73
> > #4  0x000002aa16b22f5a in qemu_thread_start (args=<optimized out>) at
> > ../../util/qemu-thread-posix.c:541
> > #5  0x000003ff7e487aa8 in start_thread (arg=0x3ff608b4910) at
> > pthread_create.c:463
> > #6  0x000003ff7e37a11e in thread_start () at
> > ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65
> > 
> > Thread 2 (Thread 0x3ff711b7910 (LWP 60129)):
> > #0  0x000003ff7e4911f6 in __lll_lock_wait
> > (futex=futex@entry=0x2aa17466bb8 <qemu_global_mutex>,
> > private=<optimized out>)
> >     at lowlevellock.c:46
> > #1  0x000003ff7e48a204 in __GI___pthread_mutex_lock
> > (mutex=mutex@entry=0x2aa17466bb8 <qemu_global_mutex>)
> >     at ../nptl/pthread_mutex_lock.c:80
> > #2  0x000002aa16b23116 in qemu_mutex_lock_impl (mutex=0x2aa17466bb8
> > <qemu_global_mutex>, file=0x2aa170a07d8 "../../util/rcu.c",
> > line=<optimized out>) at ../../util/qemu-thread-posix.c:79
> > #3  0x000002aa168221c0 in qemu_mutex_lock_iothread_impl
> > (file=file@entry=0x2aa170a07d8 "../../util/rcu.c",
> > line=line@entry=266)
> >     at ../../softmmu/cpus.c:493
> > #4  0x000002aa16b4c8aa in call_rcu_thread (opaque=opaque@entry=0x0) at
> > ../../util/rcu.c:266
> > #5  0x000002aa16b22f5a in qemu_thread_start (args=<optimized out>) at
> > ../../util/qemu-thread-posix.c:541
> > #6  0x000003ff7e487aa8 in start_thread (arg=0x3ff711b7910) at
> > pthread_create.c:463
> > #7  0x000003ff7e37a11e in thread_start () at
> > ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65
> > 
> > Thread 1 (Thread 0x3ff81035470 (LWP 60123)):
> > #0  0x000003ff7e36f1b0 in __GI___poll (fds=0x2aa18147da0, nfds=2,
> > timeout=<optimized out>) at ../sysdeps/unix/sysv/linux/poll.c:29
> > #1  0x000003ff7f9d2624 in  () at /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #2  0x000003ff7f9d2aa8 in g_main_loop_run () at
> > /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #3  0x000002aa169104de in vhost_user_read
> > (dev=dev@entry=0x2aa18f7afc8, msg=msg@entry=0x3ffc167ddfc)
> >     at ../../hw/virtio/vhost-user.c:402
> > #4  0x000002aa1691186e in vhost_user_get_u64
> > (dev=dev@entry=0x2aa18f7afc8, request=request@entry=1,
> > u64=u64@entry=0x3ffc167e138)
> >     at ../../hw/virtio/vhost-user.c:1334
> > #5  0x000002aa16913388 in vhost_user_get_features
> > (features=0x3ffc167e138, dev=0x2aa18f7afc8) at
> > ../../hw/virtio/vhost-user.c:1356
> > #6  0x000002aa16913388 in vhost_user_backend_init (dev=0x2aa18f7afc8,
> > opaque=0x2aa18f7b240, errp=0x3ffc167e470)
> >     at ../../hw/virtio/vhost-user.c:1877
> > #7  0x000002aa1684030c in vhost_dev_init
> > (hdev=hdev@entry=0x2aa18f7afc8, opaque=opaque@entry=0x2aa18f7b240,
> > backend_type=backend_type@entry=VHOST_BACKEND_TYPE_USER,
> > busyloop_timeout=busyloop_timeout@entry=0,
> > errp=errp@entry=0x3ffc167e470)
> >     at ../../hw/virtio/vhost.c:1302
> > #8  0x000002aa168be742 in vhost_user_blk_connect
> > (dev=dev@entry=0x2aa18f7ad78, errp=errp@entry=0x3ffc167e470)
> >     at ../../hw/block/vhost-user-blk.c:339
> > #9  0x000002aa168bec44 in vhost_user_blk_realize_connect
> > (errp=0x3ffc167e470, s=0x2aa18f7ad78) at
> > ../../hw/block/vhost-user-blk.c:440
> > #10 0x000002aa168bec44 in vhost_user_blk_device_realize
> > (dev=0x2aa18f7ad78, errp=0x3ffc167e470)
> >     at ../../hw/block/vhost-user-blk.c:513
> > #11 0x000002aa16819e78 in virtio_device_realize (dev=0x2aa18f7ad78,
> > errp=0x3ffc167e530) at ../../hw/virtio/virtio.c:3659
> > #12 0x000002aa169fcc50 in device_set_realized (obj=<optimized out>,
> > value=<optimized out>, errp=0x3ffc167e690)
> >     at ../../hw/core/qdev.c:761
> > #13 0x000002aa169229ba in property_set_bool
> > (obj=obj@entry=0x2aa18f7ad78, v=v@entry=
> >     0x2aa18fc41e0, name=name@entry=0x2aa16ccfd74 "realized",
> > opaque=0x2aa17f2a7a0, errp=errp@entry=0x3ffc167e690)
> >     at ../../qom/object.c:2258
> > #14 0x000002aa16925534 in object_property_set
> > (obj=obj@entry=0x2aa18f7ad78, name=name@entry=0x2aa16ccfd74
> > "realized", v=v@entry=
> >     0x2aa18fc41e0, errp=errp@entry=0x3ffc167e980) at ../../qom/object.c:1403
> > #15 0x000002aa1692229c in object_property_set_qobject
> > (obj=obj@entry=0x2aa18f7ad78, name=name@entry=0x2aa16ccfd74
> > "realized", value=value@entry=0x2aa18fc4120,
> > errp=errp@entry=0x3ffc167e980) at ../../qom/qom-qobject.c:28
> > #16 0x000002aa169257e2 in object_property_set_bool (obj=0x2aa18f7ad78,
> > name=0x2aa16ccfd74 "realized", value=<optimized out>,
> > errp=0x3ffc167e980) at ../../qom/object.c:1473
> > #17 0x000002aa1677e9c2 in virtio_pci_realize (pci_dev=0x2aa18f72b80,
> > errp=0x3ffc167e980) at ../../hw/virtio/virtio-pci.c:1907
> > #18 0x000002aa165dfc48 in pci_qdev_realize (qdev=0x2aa18f72b80,
> > errp=<optimized out>) at ../../hw/pci/pci.c:2148
> > #19 0x000002aa169fcc50 in device_set_realized (obj=<optimized out>,
> > value=<optimized out>, errp=0x3ffc167ebe8)
> >     at ../../hw/core/qdev.c:761
> > #20 0x000002aa169229ba in property_set_bool
> > (obj=obj@entry=0x2aa18f72b80, v=v@entry=
> >     0x2aa18fbe450, name=name@entry=0x2aa16ccfd74 "realized",
> > opaque=0x2aa17f2a7a0, errp=errp@entry=0x3ffc167ebe8)
> >     at ../../qom/object.c:2258
> > #21 0x000002aa16925534 in object_property_set
> > (obj=obj@entry=0x2aa18f72b80, name=name@entry=0x2aa16ccfd74
> > "realized", v=v@entry=
> >     0x2aa18fbe450, errp=errp@entry=0x2aa17484fe0 <error_fatal>) at
> > ../../qom/object.c:1403
> > #22 0x000002aa1692229c in object_property_set_qobject
> > (obj=obj@entry=0x2aa18f72b80, name=name@entry=0x2aa16ccfd74
> > "realized", value=value@entry=0x2aa18fbe580,
> > errp=errp@entry=0x2aa17484fe0 <error_fatal>) at
> > ../../qom/qom-qobject.c:28
> > #23 0x000002aa169257e2 in object_property_set_bool (obj=0x2aa18f72b80,
> > name=name@entry=0x2aa16ccfd74 "realized", value=value@entry=true,
> > errp=0x2aa17484fe0 <error_fatal>, errp@entry=<error reading variable:
> > value has been optimized out>) at ../../qom/object.c:1473
> > #24 0x000002aa169fb87e in qdev_realize (dev=<optimized out>,
> > bus=bus@entry=0x2aa18384200, errp=<error reading variable: value has
> > been optimized out>) at ../../hw/core/qdev.c:389
> > #25 0x000002aa165f1c6c in qdev_device_add (opts=<optimized out>,
> > errp=<optimized out>, errp@entry=0x2aa17484fe0 <error_fatal>)
> >     at ../../softmmu/qdev-monitor.c:673
> > #26 0x000002aa168b2086 in device_init_func (opaque=<optimized out>,
> > opts=<optimized out>, errp=0x2aa17484fe0 <error_fatal>)
> >     at ../../softmmu/vl.c:1187
> > #27 0x000002aa16b2b906 in qemu_opts_foreach (list=<optimized out>,
> > func=func@entry=0x2aa168b2068 <device_init_func>,
> > opaque=opaque@entry=0x0, errp=0x2aa17484fe0 <error_fatal>) at
> > ../../util/qemu-option.c:1135
> > #28 0x000002aa168b5046 in qemu_create_cli_devices () at ../../softmmu/vl.c:2645
> > #29 0x000002aa168b5046 in qmp_x_exit_preconfig (errp=<optimized out>)
> > at ../../softmmu/vl.c:2693
> > #30 0x000002aa168b83ce in qemu_init (argc=<optimized out>,
> > argv=<optimized out>, envp=<optimized out>) at ../../softmmu/vl.c:3713
> > #31 0x000002aa1654f6da in main (argc=<optimized out>, argv=<optimized
> > out>, envp=<optimized out>) at ../../softmmu/main.c:49
> > 
> > 
> > Backtrace, qos-test:
> > 
> > Thread 2 (Thread 0x3ff9de7f910 (LWP 59555)):
> > #0  syscall () at ../sysdeps/unix/sysv/linux/s390/s390-64/syscall.S:53
> > #1  0x000002aa20c36a54 in qemu_futex_wait (val=<optimized out>,
> > f=<optimized out>) at /home/ubuntu/qemu/include/qemu/futex.h:29
> > #2  qemu_event_wait (ev=ev@entry=0x2aa20cdc3a0 <rcu_call_ready_event>)
> > at ../../util/qemu-thread-posix.c:480
> > #3  0x000002aa20c4efa4 in call_rcu_thread (opaque=opaque@entry=0x0) at
> > ../../util/rcu.c:258
> > #4  0x000002aa20c35a6a in qemu_thread_start (args=<optimized out>) at
> > ../../util/qemu-thread-posix.c:541
> > #5  0x000003ff9e887aa8 in start_thread (arg=0x3ff9de7f910) at
> > pthread_create.c:463
> > #6  0x000003ff9e77a11e in thread_start () at
> > ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65
> > 
> > Thread 1 (Thread 0x3ff9eef0940 (LWP 59553)):
> > #0  0x000003ff9e89165a in __libc_read (fd=<optimized out>,
> > buf=buf@entry=0x3ffc10fd3e0, nbytes=nbytes@entry=1024)
> >     at ../sysdeps/unix/sysv/linux/read.c:27
> > #1  0x000002aa20bdee7a in read (__nbytes=1024, __buf=0x3ffc10fd3e0,
> > __fd=<optimized out>)
> >     at /usr/include/s390x-linux-gnu/bits/unistd.h:44
> > #2  qtest_client_socket_recv_line (s=0x2aa22379fc0) at
> > ../../tests/qtest/libqtest.c:494
> > #3  0x000002aa20bdf032 in qtest_rsp_args (s=s@entry=0x2aa22379fc0,
> > expected_args=expected_args@entry=1)
> >     at ../../tests/qtest/libqtest.c:521
> > #4  0x000002aa20bdf6f2 in qtest_query_target_endianness
> > (s=0x2aa22379fc0) at ../../tests/qtest/libqtest.c:570
> > #5  0x000002aa20be0312 in qtest_init_without_qmp_handshake
> > (extra_args=<optimized out>) at ../../tests/qtest/libqtest.c:332
> > #6  0x000002aa20be09e2 in qtest_init (extra_args=<optimized out>) at
> > ../../tests/qtest/libqtest.c:339
> > #7  0x000002aa20bc7b76 in qtest_start (
> >     args=0x2aa22389ef0 "-M pc  -device
> > vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
> > memory-backend-memfd,id=mem,size=256M,share=on  -M memory-backend=mem
> > -m 256M -chardev socket,id=char1,path=/tmp/qtest-59553-so"...)
> >     at ../../tests/qtest/libqtest-single.h:29
> > #8  restart_qemu_or_continue (
> >     path=0x2aa22389ef0 "-M pc  -device
> > vhost-user-blk-pci,id=drv0,chardev=char1,addr=4.0 -object
> > memory-backend-memfd,id=mem,size=256M,share=on  -M memory-backend=mem
> > -m 256M -chardev socket,id=char1,path=/tmp/qtest-59553-so"...) at
> > ../../tests/qtest/qos-test.c:105
> > #9  run_one_test (arg=<optimized out>) at ../../tests/qtest/qos-test.c:178
> > #10 0x000003ff9eb7c70c in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #11 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #12 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #13 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #14 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #15 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #16 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #17 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #18 0x000003ff9eb7c648 in ?? () from /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #19 0x000003ff9eb7c8ee in g_test_run_suite () from
> > /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #20 0x000003ff9eb7c928 in g_test_run () from
> > /usr/lib/s390x-linux-gnu/libglib-2.0.so.0
> > #21 0x000002aa20bc6c9a in main (argc=<optimized out>, argv=<optimized
> > out>, envp=<optimized out>) at ../../tests/qtest/qos-test.c:338
> > 
> > Backtrace, qemu-storage-daemon:
> > 
> > Thread 2 (Thread 0x3ff951b7910 (LWP 60132)):
> > #0  syscall () at ../sysdeps/unix/sysv/linux/s390/s390-64/syscall.S:53
> > #1  0x000002aa0184fa6c in qemu_futex_wait (val=<optimized out>,
> > f=<optimized out>) at /home/ubuntu/qemu/include/qemu/futex.h:29
> > #2  qemu_event_wait (ev=ev@entry=0x2aa01971158 <rcu_call_ready_event>)
> > at ../../util/qemu-thread-posix.c:480
> > #3  0x000002aa0187498c in call_rcu_thread (opaque=opaque@entry=0x0) at
> > ../../util/rcu.c:258
> > #4  0x000002aa0184ea82 in qemu_thread_start (args=<optimized out>) at
> > ../../util/qemu-thread-posix.c:541
> > #5  0x000003ffa0507aa8 in start_thread (arg=0x3ff951b7910) at
> > pthread_create.c:463
> > #6  0x000003ffa03fa11e in thread_start () at
> > ../sysdeps/unix/sysv/linux/s390/s390-64/clone.S:65
> > 
> > Thread 1 (Thread 0x3ffa14590e0 (LWP 60122)):
> > #0  0x000003ffa03ef302 in __GI_ppoll (fds=0x2aa0332b3d0, nfds=5,
> > timeout=<optimized out>, timeout@entry=0x0,
> >     sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39
> > #1  0x000002aa0185b054 in ppoll (__ss=0x0, __timeout=0x0,
> > __nfds=<optimized out>, __fds=<optimized out>)
> >     at /usr/include/s390x-linux-gnu/bits/poll2.h:77
> > #2  qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>,
> > timeout=timeout@entry=-1) at ../../util/qemu-timer.c:336
> > #3  0x000002aa01851e48 in os_host_main_loop_wait (timeout=-1) at
> > ../../util/main-loop.c:250
> > #4  main_loop_wait (nonblocking=<optimized out>) at ../../util/main-loop.c:531
> > #5  0x000002aa01738dda in main (argc=<optimized out>,
> > argv=0x3ffd28feba8) at ../../storage-daemon/qemu-storage-daemon.c:345
> > 
> > 
> > thanks
> > -- PMM
> 
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-08-27 16:55 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-04 12:40 intermittent failure, s390 host, x86-64 guest, vhost-user-blk test Peter Maydell
2021-08-04 13:43 ` Michael S. Tsirkin
2021-08-04 14:33   ` Peter Maydell
2021-08-05 17:39     ` Peter Maydell
2021-08-06 10:36       ` Peter Maydell
2021-08-27 16:53   ` Raphael Norwitz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.