From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:54819) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bPTgF-0002by-3I for qemu-devel@nongnu.org; Tue, 19 Jul 2016 07:59:20 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1bPTgB-0006NR-15 for qemu-devel@nongnu.org; Tue, 19 Jul 2016 07:59:19 -0400 Received: from mailhub.sw.ru ([195.214.232.25]:31482 helo=relay.sw.ru) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bPTgA-0006Mx-FB for qemu-devel@nongnu.org; Tue, 19 Jul 2016 07:59:14 -0400 References: <1468876979-13537-1-git-send-email-vsementsov@virtuozzo.com> <578D4996.2050503@virtuozzo.com> <20160719021252.GB18103@ad.usersys.redhat.com> From: Vladimir Sementsov-Ogievskiy Message-ID: <578E15FC.3070101@virtuozzo.com> Date: Tue, 19 Jul 2016 14:58:52 +0300 MIME-Version: 1.0 In-Reply-To: <20160719021252.GB18103@ad.usersys.redhat.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH] backup: block-job error BUG List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Fam Zheng Cc: qemu-block@nongnu.org, kwolf@redhat.com, jcody@redhat.com, qemu-devel , mreitz@redhat.com, stefanha@redhat.com, den@openvz.org On 19.07.2016 05:12, Fam Zheng wrote: > On Tue, 07/19 00:26, Vladimir Sementsov-Ogievskiy wrote: >> forget qemu-devel@nongnu.org.. add it. >> >> On 19.07.2016 00:22, Vladimir Sementsov-Ogievskiy wrote: >>> Hi all! >>> >>> This is a variant of existing test case which produces test failure. >>> >>> It looks like the reason is: >>> >>> one block job is in backup_complete, in synchronous bdrv_flush (success job) >>> other (job with injected io err) tries to synchronously cancel "success job" >>> It looks like some kind of dead-lock >>> >>> -.......... >>> +........EEE >>> +====================================================================== >>> +ERROR: test_transaction_failure (__main__.TestIncrementalBackup) >>> +Test: Verify backups made from a transaction that partially fails. >>> +---------------------------------------------------------------------- >>> +Traceback (most recent call last): >>> + File "124", line 478, in test_transaction_failure >>> + self.wait_qmp_backup_cancelled(drive0['id']) >>> + File "124", line 173, in wait_qmp_backup_cancelled >>> + match={'data': {'device': device}}) >>> + File "/work/src/qemu/tests/qemu-iotests/iotests.py", line 308, in event_wait >>> + event = self._qmp.pull_event(wait=timeout) >>> + File "/work/src/qemu/tests/qemu-iotests/../../scripts/qmp/qmp.py", line 194, in pull_event >>> + self.__get_events(wait) >>> + File "/work/src/qemu/tests/qemu-iotests/../../scripts/qmp/qmp.py", line 109, in __get_events >>> + raise QMPTimeoutError("Timeout waiting for event") >>> +QMPTimeoutError: Timeout waiting for event >>> + >>> +====================================================================== >>> +ERROR: test_transaction_failure (__main__.TestIncrementalBackup) >>> +Test: Verify backups made from a transaction that partially fails. >>> +---------------------------------------------------------------------- >>> +Traceback (most recent call last): >>> + File "124", line 272, in tearDown >>> + self.vm.shutdown() >>> + File "/work/src/qemu/tests/qemu-iotests/iotests.py", line 260, in shutdown >>> + self._qmp.cmd('quit') >>> + File "/work/src/qemu/tests/qemu-iotests/../../scripts/qmp/qmp.py", line 172, in cmd >>> + return self.cmd_obj(qmp_cmd) >>> + File "/work/src/qemu/tests/qemu-iotests/../../scripts/qmp/qmp.py", line 157, in cmd_obj >>> + return self.__json_read() >>> + File "/work/src/qemu/tests/qemu-iotests/../../scripts/qmp/qmp.py", line 66, in __json_read >>> + data = self.__sockfile.readline() >>> + File "/usr/lib64/python2.7/socket.py", line 447, in readline >>> + data = self._sock.recv(self._rbufsize) >>> +timeout: timed out >>> + >>> +====================================================================== >>> +ERROR: test_incremental_failure (__main__.TestIncrementalBackupBlkdebug) >>> +Test: Verify backups made after a failure are correct. >>> +---------------------------------------------------------------------- >>> +Traceback (most recent call last): >>> + File "124", line 549, in setUp >>> + self.vm.launch() >>> + File "/work/src/qemu/tests/qemu-iotests/iotests.py", line 246, in launch >>> + self._qmp = qmp.QEMUMonitorProtocol(self._monitor_path, server=True) >>> + File "/work/src/qemu/tests/qemu-iotests/../../scripts/qmp/qmp.py", line 44, in __init__ >>> + self.__sock.bind(self.__address) >>> + File "/usr/lib64/python2.7/socket.py", line 224, in meth >>> + return getattr(self._sock,name)(*args) >>> +error: [Errno 98] Address already in use >>> + >>> ---------------------------------------------------------------------- >>> Ran 10 tests >>> >>> -OK >>> +FAILED (errors=3) >>> Failures: 124 >>> >>> Signed-off-by: Vladimir Sementsov-Ogievskiy >>> --- >>> tests/qemu-iotests/124 | 6 +++--- >>> 1 file changed, 3 insertions(+), 3 deletions(-) >>> >>> diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124 >>> index de7cdbe..74de117 100644 >>> --- a/tests/qemu-iotests/124 >>> +++ b/tests/qemu-iotests/124 >>> @@ -448,9 +448,9 @@ class TestIncrementalBackup(TestIncrementalBackupBase): >>> self.assertFalse(self.vm.get_qmp_events(wait=False)) >>> # Emulate some writes >>> - self.hmp_io_writes(drive0['id'], (('0xab', 0, 512), >>> - ('0xfe', '16M', '256k'), >>> - ('0x64', '32736k', '64k'))) >>> + #self.hmp_io_writes(drive0['id'], (('0xab', 0, 512), >>> + # ('0xfe', '16M', '256k'), >>> + # ('0x64', '32736k', '64k'))) > This patch doesn't make any sense to me. > > If this is a deadlock in QEMU, shouldn't we fix the code instead of randomly > commenting out a few lines in the test case? > > I cannot reproduce this on master. Could you post a backtrace where the block > job is stopped? > > Fam This patch is not for commiting, but only to reproduce the bug. with --enable-debug: bt: Thread 3 (Thread 0x7ff6e5ac7700 (LWP 3300)): #0 0x00007ff6eb7da469 in syscall () from /lib64/libc.so.6 #1 0x00007ff6f33f6845 in futex_wait (ev=0x7ff6f3e844a4 , val=4294967295) at util/qemu-thread-posix.c:292 #2 0x00007ff6f33f69ae in qemu_event_wait (ev=0x7ff6f3e844a4 ) at util/qemu-thread-posix.c:399 #3 0x00007ff6f340bf80 in call_rcu_thread (opaque=0x0) at util/rcu.c:250 #4 0x00007ff6ee67fdc5 in start_thread () from /lib64/libpthread.so.0 #5 0x00007ff6eb7dfced in clone () from /lib64/libc.so.6 Thread 2 (Thread 0x7ff6e43bd700 (LWP 3302)): #0 0x00007ff6ee686e91 in sigwait () from /lib64/libpthread.so.0 #1 0x00007ff6f2f9aace in qemu_dummy_cpu_thread_fn (arg=0x7ff6f42c2250) at /work/src/qemu/cpus.c:1123 #2 0x00007ff6ee67fdc5 in start_thread () from /lib64/libpthread.so.0 #3 0x00007ff6eb7dfced in clone () from /lib64/libc.so.6 Thread 1 (Thread 0x7ff6f2d34c00 (LWP 3299)): #0 rfifolock_lock (r=0x7ff6f4280e00) at util/rfifolock.c:66 #1 0x00007ff6f3319dd8 in aio_context_acquire (ctx=0x7ff6f4280da0) at async.c:373 #2 0x00007ff6f33236ed in block_job_completed_txn_abort (job=0x7ff6f59e91c0) at blockjob.c:240 #3 0x00007ff6f332399d in block_job_completed (job=0x7ff6f59e91c0, ret=-5) at blockjob.c:295 #4 0x00007ff6f3106776 in backup_complete (job=0x7ff6f59e91c0, opaque=0x7ff6f6058660) at block/backup.c:288 #5 0x00007ff6f3324389 in block_job_defer_to_main_loop_bh (opaque=0x7ff6f62073d0) at blockjob.c:608 #6 0x00007ff6f3319477 in aio_bh_call (bh=0x7ff6f42f86d0) at async.c:66 #7 0x00007ff6f3319539 in aio_bh_poll (ctx=0x7ff6f4280da0) at async.c:94 #8 0x00007ff6f3326ae9 in aio_dispatch (ctx=0x7ff6f4280da0) at aio-posix.c:308 #9 0x00007ff6f3319907 in aio_ctx_dispatch (source=0x7ff6f4280da0, callback=0x0, user_data=0x0) at async.c:233 #10 0x00007ff6ec51d7aa in g_main_context_dispatch () from /lib64/libglib-2.0.so.0 #11 0x00007ff6f3324b19 in glib_pollfds_poll () at main-loop.c:213 #12 0x00007ff6f3324bf6 in os_host_main_loop_wait (timeout=0) at main-loop.c:258 #13 0x00007ff6f3324ca6 in main_loop_wait (nonblocking=1) at main-loop.c:506 #14 0x00007ff6f30d3faa in main_loop () at vl.c:1908 #15 0x00007ff6f30db97c in main (argc=16, argv=0x7ffec0b0e3b8, envp=0x7ffec0b0e440) at vl.c:4601 > >>> self.hmp_io_writes(drive1['id'], (('0xba', 0, 512), >>> ('0xef', '16M', '256k'), >>> ('0x46', '32736k', '64k'))) >> >> -- >> Best regards, >> Vladimir >> >> -- Best regards, Vladimir