All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 0/2] Add timeout mechanism to qmp actions
@ 2020-08-10 14:52 Zhenyu Ye
  2020-08-10 14:52 ` [PATCH v1 1/2] util: introduce aio_context_acquire_timeout Zhenyu Ye
                   ` (3 more replies)
  0 siblings, 4 replies; 28+ messages in thread
From: Zhenyu Ye @ 2020-08-10 14:52 UTC (permalink / raw)
  To: qemu-devel, qemu-block
  Cc: kwolf, fam, yezhenyu2, armbru, xiexiangyou, mreitz, stefanha, pbonzini

Before doing qmp actions, we need to lock the qemu_global_mutex,
so the qmp actions should not take too long time.

Unfortunately, some qmp actions need to acquire aio context and
this may take a long time.  The vm will soft lockup if this time
is too long.

So add a timeout mechanism while doing qmp actions.

Zhenyu Ye (2):
  util: introduce aio_context_acquire_timeout
  qmp: use aio_context_acquire_timeout replace aio_context_acquire

 block/qapi-sysemu.c         |  7 ++++++-
 block/qapi.c                |  6 +++++-
 blockdev.c                  | 35 ++++++++++++++++++++++++++++++-----
 include/block/aio.h         |  6 ++++++
 include/qemu/thread-posix.h |  1 +
 include/qemu/thread.h       |  1 +
 util/async.c                | 10 ++++++++++
 util/qemu-thread-posix.c    |  6 ++++++
 8 files changed, 65 insertions(+), 7 deletions(-)

-- 
2.22.0.windows.1




^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v1 1/2] util: introduce aio_context_acquire_timeout
  2020-08-10 14:52 [PATCH v1 0/2] Add timeout mechanism to qmp actions Zhenyu Ye
@ 2020-08-10 14:52 ` Zhenyu Ye
  2020-08-10 14:52 ` [PATCH v1 2/2] qmp: use aio_context_acquire_timeout replace aio_context_acquire Zhenyu Ye
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 28+ messages in thread
From: Zhenyu Ye @ 2020-08-10 14:52 UTC (permalink / raw)
  To: qemu-devel, qemu-block
  Cc: kwolf, fam, yezhenyu2, armbru, xiexiangyou, mreitz, stefanha, pbonzini

Currently we only have the aio_context_acquire() to take the
ownership of the AioContext.  If the mutex is locked by other
threads, this function will wait util the mutex is released.

Sometimes we may want to get some return values after waiting
for some time.  So introduce aio_context_acquire_timeout(),
which will return ETIMEDOUT after @t seconds.

This will be used in next patch.

Signed-off-by: Zhenyu Ye <yezhenyu2@huawei.com>
---
 include/block/aio.h         |  5 +++++
 include/qemu/thread-posix.h |  1 +
 include/qemu/thread.h       |  1 +
 util/async.c                | 10 ++++++++++
 util/qemu-thread-posix.c    |  6 ++++++
 5 files changed, 23 insertions(+)

diff --git a/include/block/aio.h b/include/block/aio.h
index b2f703fa3f..cb178cdf5a 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -287,6 +287,11 @@ void aio_context_unref(AioContext *ctx);
  */
 void aio_context_acquire(AioContext *ctx);
 
+/* Add timeout to aio_context_acquire().  If the time for obtaining
+ * the lock exceeds @t, return ETIMEDOUT.
+ */
+int aio_context_acquire_timeout(AioContext *ctx, int t);
+
 /* Relinquish ownership of the AioContext. */
 void aio_context_release(AioContext *ctx);
 
diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index c903525062..2e62181959 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -9,6 +9,7 @@ typedef QemuMutex QemuRecMutex;
 #define qemu_rec_mutex_lock_impl    qemu_mutex_lock_impl
 #define qemu_rec_mutex_trylock_impl qemu_mutex_trylock_impl
 #define qemu_rec_mutex_unlock qemu_mutex_unlock
+#define qemu_rec_mutex_timed_lock qemu_mutex_timed_lock
 
 struct QemuMutex {
     pthread_mutex_t lock;
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 4baf4d1715..7a96a5df2a 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -27,6 +27,7 @@ void qemu_mutex_destroy(QemuMutex *mutex);
 int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line);
 void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line);
 void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line);
+int qemu_mutex_timed_lock(QemuMutex *mutex, const struct timespec *tsptr);
 
 typedef void (*QemuMutexLockFunc)(QemuMutex *m, const char *f, int l);
 typedef int (*QemuMutexTrylockFunc)(QemuMutex *m, const char *f, int l);
diff --git a/util/async.c b/util/async.c
index 1319eee3bc..c478d3dbf5 100644
--- a/util/async.c
+++ b/util/async.c
@@ -605,6 +605,16 @@ void aio_context_acquire(AioContext *ctx)
     qemu_rec_mutex_lock(&ctx->lock);
 }
 
+int aio_context_acquire_timeout(AioContext *ctx, int t)
+{
+    struct timespec tout;
+
+    clock_gettime(CLOCK_REALTIME, &tout);
+    tout.tv_sec += t;
+
+    return qemu_rec_mutex_timed_lock(&ctx->lock, &tout);
+}
+
 void aio_context_release(AioContext *ctx)
 {
     qemu_rec_mutex_unlock(&ctx->lock);
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index b4c2359272..49fb98a2e8 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -82,6 +82,12 @@ void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line)
     qemu_mutex_post_lock(mutex, file, line);
 }
 
+int qemu_mutex_timed_lock(QemuMutex *mutex, const struct timespec *tsptr)
+{
+    assert(mutex->initialized);
+    return pthread_mutex_timedlock(&mutex->lock, tsptr);
+}
+
 int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
 {
     int err;
-- 
2.22.0.windows.1




^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v1 2/2] qmp: use aio_context_acquire_timeout replace aio_context_acquire
  2020-08-10 14:52 [PATCH v1 0/2] Add timeout mechanism to qmp actions Zhenyu Ye
  2020-08-10 14:52 ` [PATCH v1 1/2] util: introduce aio_context_acquire_timeout Zhenyu Ye
@ 2020-08-10 14:52 ` Zhenyu Ye
  2020-08-10 15:38 ` [PATCH v1 0/2] Add timeout mechanism to qmp actions Kevin Wolf
  2020-08-12 13:51 ` Stefan Hajnoczi
  3 siblings, 0 replies; 28+ messages in thread
From: Zhenyu Ye @ 2020-08-10 14:52 UTC (permalink / raw)
  To: qemu-devel, qemu-block
  Cc: kwolf, fam, yezhenyu2, armbru, xiexiangyou, mreitz, stefanha, pbonzini

Before doing qmp actions, we need to lock the qemu_global_mutex,
so the qmp actions should not take too long time.

Unfortunately, some qmp actions need to acquire aio context and
this may take a long time.  So replace aio_context_acquire()
by aio_context_acquire_timeout() and return ETIMEDOUT if the
waiting time exceeds LOCK_TIMEOUT (default set to 3 seconds).

Signed-off-by: Zhenyu Ye <yezhenyu2@huawei.com>
---
 block/qapi-sysemu.c |  7 ++++++-
 block/qapi.c        |  6 +++++-
 blockdev.c          | 35 ++++++++++++++++++++++++++++++-----
 include/block/aio.h |  1 +
 4 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c
index 8498402ad4..e7cd7d3c70 100644
--- a/block/qapi-sysemu.c
+++ b/block/qapi-sysemu.c
@@ -439,6 +439,7 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
     BlockDriverState *bs;
     BlockBackend *blk;
     AioContext *aio_context;
+    int ret;
 
     blk = qmp_get_blk(arg->has_device ? arg->device : NULL,
                       arg->has_id ? arg->id : NULL,
@@ -448,7 +449,11 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
     }
 
     aio_context = blk_get_aio_context(blk);
-    aio_context_acquire(aio_context);
+    ret = aio_context_acquire_timeout(aio_context, LOCK_TIMEOUT);
+    if (ret) {
+        error_setg_errno(errp, ret, "acquire aio context failed");
+        return;
+    }
 
     bs = blk_bs(blk);
     if (!bs) {
diff --git a/block/qapi.c b/block/qapi.c
index afd9f3b4a7..ff2454daff 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -271,7 +271,11 @@ void bdrv_query_image_info(BlockDriverState *bs,
     Error *err = NULL;
     ImageInfo *info;
 
-    aio_context_acquire(bdrv_get_aio_context(bs));
+    ret = aio_context_acquire_timeout(bdrv_get_aio_context(bs), LOCK_TIMEOUT);
+    if (ret) {
+        error_setg_errno(errp, ret, "acquire aio context failed");
+        return;
+    }
 
     size = bdrv_getlength(bs);
     if (size < 0) {
diff --git a/blockdev.c b/blockdev.c
index 3848a9c8ab..ce133c89e2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2431,6 +2431,7 @@ void qmp_block_resize(bool has_device, const char *device,
     BlockBackend *blk = NULL;
     BlockDriverState *bs;
     AioContext *aio_context;
+    int ret;
 
     bs = bdrv_lookup_bs(has_device ? device : NULL,
                         has_node_name ? node_name : NULL,
@@ -2441,7 +2442,11 @@ void qmp_block_resize(bool has_device, const char *device,
     }
 
     aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
+    ret = aio_context_acquire_timeout(aio_context, LOCK_TIMEOUT);
+    if (ret) {
+        error_setg_errno(errp, ret, "acquire aio context failed");
+        return;
+    }
 
     if (size < 0) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
@@ -3016,7 +3021,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     }
 
     aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
+    ret = aio_context_acquire_timeout(aio_context, LOCK_TIMEOUT);
+    if (ret) {
+        error_setg_errno(errp, ret, "acquire aio context failed");
+        return;
+    }
 
     if (!arg->has_mode) {
         arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -3184,12 +3193,20 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
     /* Honor bdrv_try_set_aio_context() context acquisition requirements. */
     old_context = bdrv_get_aio_context(target_bs);
     aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(old_context);
+    ret = aio_context_acquire_timeout(old_context, LOCK_TIMEOUT);
+    if (ret) {
+        error_setg_errno(errp, ret, "acquire aio context failed");
+        return;
+    }
 
     ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
 
     aio_context_release(old_context);
-    aio_context_acquire(aio_context);
+    ret = aio_context_acquire_timeout(aio_context, LOCK_TIMEOUT);
+    if (ret) {
+        error_setg_errno(errp, ret, "acquire aio context failed");
+        return;
+    }
 
     if (ret < 0) {
         goto out;
@@ -3603,6 +3620,7 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 {
     BlockJobInfoList *head = NULL, **p_next = &head;
     BlockJob *job;
+    int ret;
 
     for (job = block_job_next(NULL); job; job = block_job_next(job)) {
         BlockJobInfoList *elem;
@@ -3613,7 +3631,14 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
         }
         elem = g_new0(BlockJobInfoList, 1);
         aio_context = blk_get_aio_context(job->blk);
-        aio_context_acquire(aio_context);
+        ret = aio_context_acquire_timeout(aio_context, LOCK_TIMEOUT);
+        if (ret) {
+            g_free(elem);
+            qapi_free_BlockJobInfoList(head);
+            error_setg_errno(errp, ret, "acquire aio context failed");
+            return NULL;
+        }
+
         elem->value = block_job_query(job, errp);
         aio_context_release(aio_context);
         if (!elem->value) {
diff --git a/include/block/aio.h b/include/block/aio.h
index cb178cdf5a..539096f547 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -290,6 +290,7 @@ void aio_context_acquire(AioContext *ctx);
 /* Add timeout to aio_context_acquire().  If the time for obtaining
  * the lock exceeds @t, return ETIMEDOUT.
  */
+#define LOCK_TIMEOUT  3
 int aio_context_acquire_timeout(AioContext *ctx, int t);
 
 /* Relinquish ownership of the AioContext. */
-- 
2.22.0.windows.1




^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-08-10 14:52 [PATCH v1 0/2] Add timeout mechanism to qmp actions Zhenyu Ye
  2020-08-10 14:52 ` [PATCH v1 1/2] util: introduce aio_context_acquire_timeout Zhenyu Ye
  2020-08-10 14:52 ` [PATCH v1 2/2] qmp: use aio_context_acquire_timeout replace aio_context_acquire Zhenyu Ye
@ 2020-08-10 15:38 ` Kevin Wolf
  2020-08-11 13:54   ` Zhenyu Ye
  2020-08-12 13:51 ` Stefan Hajnoczi
  3 siblings, 1 reply; 28+ messages in thread
From: Kevin Wolf @ 2020-08-10 15:38 UTC (permalink / raw)
  To: Zhenyu Ye
  Cc: fam, qemu-block, armbru, xiexiangyou, qemu-devel, stefanha,
	pbonzini, mreitz

Am 10.08.2020 um 16:52 hat Zhenyu Ye geschrieben:
> Before doing qmp actions, we need to lock the qemu_global_mutex,
> so the qmp actions should not take too long time.
> 
> Unfortunately, some qmp actions need to acquire aio context and
> this may take a long time.  The vm will soft lockup if this time
> is too long.

Do you have a specific situation in mind where getting the lock of an
AioContext can take a long time? I know that the main thread can
block for considerable time, but QMP commands run in the main thread, so
this patch doesn't change anything for this case. It would be effective
if an iothread blocks, but shouldn't everything running in an iothread
be asynchronous and therefore keep the AioContext lock only for a short
time?

Kevin



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-08-10 15:38 ` [PATCH v1 0/2] Add timeout mechanism to qmp actions Kevin Wolf
@ 2020-08-11 13:54   ` Zhenyu Ye
  2020-08-21 12:52     ` Stefan Hajnoczi
                       ` (2 more replies)
  0 siblings, 3 replies; 28+ messages in thread
From: Zhenyu Ye @ 2020-08-11 13:54 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: fam, qemu-block, armbru, xiexiangyou, qemu-devel, stefanha,
	pbonzini, mreitz

Hi Kevin,

On 2020/8/10 23:38, Kevin Wolf wrote:
> Am 10.08.2020 um 16:52 hat Zhenyu Ye geschrieben:
>> Before doing qmp actions, we need to lock the qemu_global_mutex,
>> so the qmp actions should not take too long time.
>>
>> Unfortunately, some qmp actions need to acquire aio context and
>> this may take a long time.  The vm will soft lockup if this time
>> is too long.
> 
> Do you have a specific situation in mind where getting the lock of an
> AioContext can take a long time? I know that the main thread can
> block for considerable time, but QMP commands run in the main thread, so
> this patch doesn't change anything for this case. It would be effective
> if an iothread blocks, but shouldn't everything running in an iothread
> be asynchronous and therefore keep the AioContext lock only for a short
> time?
> 

Theoretically, everything running in an iothread is asynchronous. However,
some 'asynchronous' actions are not non-blocking entirely, such as
io_submit().  This will block while the iodepth is too big and I/O pressure
is too high.  If we do some qmp actions, such as 'info block', at this time,
may cause vm soft lockup.  This series can make these qmp actions safer.

I constructed the scene as follow:
1. create a vm with 4 disks, using iothread.
2. add press to the CPU on the host.  In my scene, the CPU usage exceeds 95%.
3. add press to the 4 disks in the vm at the same time.  I used the fio and
some parameters are:

	 fio -rw=randrw -bs=1M -size=1G -iodepth=512 -ioengine=libaio -numjobs=4

4. do block query actions, for example, by virsh:

	virsh qemu-monitor-command [vm name] --hmp info block

Then the vm will soft lockup, the calltrace is:

[  192.311393] watchdog: BUG: soft lockup - CPU#1 stuck for 42s! [kworker/1:1:33]
[  192.314241] Kernel panic - not syncing: softlockup: hung tasks
[  192.316370] CPU: 1 PID: 33 Comm: kworker/1:1 Kdump: loaded Tainted: G           OEL    4.19.36+ #16
[  192.318765] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
[  192.325638] Workqueue: events drm_fb_helper_dirty_work
[  192.roorr327238] Call trace:
[  192.331528]  dump_backtrace+0x0/0x198
[  192.332695]  show_stack+0x24/0x30
[  192.334219]  dump_stack+0xa4/0xcc
[  192.335578]  panic+0x12c/0x314
[  192.336565]  watchdog_timoot_fn+0x3e4/0x3e8
[  192.339984]  __hrtimer_run_queues+0x114/0x358
[  192.341576]  hrtimer_interrupt+0x104/0x2d8
[  192.343247]  arch_timer_handler_virt+0x38/0x58
[  192.345074]  handle_percpu_devid_irq+0x90/0x248
[  192.347238]  generic_handle_irq+0x34/0x50
[  192.349536]  __handle_domain_irq+0x68/0xc0
[  192.351193]  gic_handle_irq+0x6c/0x150
[  192.352639]  el1_irq+0xb8/0x140
[  192.353855]  vp_notify+0x28/0x38 [virtio_pci]
[  192.355646]  virtqueue_kick+0x3c/0x78 [virtio_ring]
[  192.357539]  virtio_gpu_queue_ctrl_buffer_locked+0x180/0x248 [virtio_gpu]
[  192.359869]  virtio_gpu_queue_ctrl_buffer+0x50/0x78 [virtio_gpu]
[  192.361456]  virtio_gpu_cmd_resource_flush+0x8c/0xb0 [virtio_gpu]
[  192.363422]  virtio_gpu_surface_dirty+0x60/0x110 [virtio_gpu]
[  192.365215]  virtio_gpu_framebuffer_surface_dirty+0x34/0x48 [virtio_gpu]
[  192.367452]  drm_fb_helper_dirty_work+0x178/0x1c0
[  192.368912]  process_one_work+0x1b4/0x3f8
[  192.370192]  worker_thread+0x54/0x470
[  192.371370]  kthread+0x134/0x138
[  192.379241]  ret_from_fork+0x10/0x18
[  192.382688] kernel fault(0x5) notification starting on CPU 1
[  192.385059] kernel fault(0x5) notification finished on CPU 1
[  192.387311] SMP: stopping secondary CPUs
[  192.391024] Kernel Offset: disabled
[  192.392111] CPU features: 0x0,a1806008
[  192.393306] Memory Limit: none
[  192.396701] Starting crashdump kernel...
[  192.397821] Bye!

This problem can be avoided after this series applied.

Thanks,
Zhenyu



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-08-10 14:52 [PATCH v1 0/2] Add timeout mechanism to qmp actions Zhenyu Ye
                   ` (2 preceding siblings ...)
  2020-08-10 15:38 ` [PATCH v1 0/2] Add timeout mechanism to qmp actions Kevin Wolf
@ 2020-08-12 13:51 ` Stefan Hajnoczi
  2020-08-13  1:51   ` Zhenyu Ye
  3 siblings, 1 reply; 28+ messages in thread
From: Stefan Hajnoczi @ 2020-08-12 13:51 UTC (permalink / raw)
  To: Zhenyu Ye
  Cc: kwolf, fam, qemu-block, armbru, xiexiangyou, qemu-devel,
	pbonzini, mreitz

[-- Attachment #1: Type: text/plain, Size: 741 bytes --]

On Mon, Aug 10, 2020 at 10:52:44PM +0800, Zhenyu Ye wrote:
> Before doing qmp actions, we need to lock the qemu_global_mutex,
> so the qmp actions should not take too long time.
> 
> Unfortunately, some qmp actions need to acquire aio context and
> this may take a long time.  The vm will soft lockup if this time
> is too long.
> 
> So add a timeout mechanism while doing qmp actions.

aio_context_acquire_timeout() is a workaround for code that blocks the
event loop. Ideally there should be no code that blocks the event loop.

Which cases have you found where the event loop is blocked?

I think they should be investigated and fixed (if possible) before
introducing an API like aio_context_acquire_timeout().

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-08-12 13:51 ` Stefan Hajnoczi
@ 2020-08-13  1:51   ` Zhenyu Ye
  0 siblings, 0 replies; 28+ messages in thread
From: Zhenyu Ye @ 2020-08-13  1:51 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: kwolf, fam, qemu-block, armbru, xiexiangyou, qemu-devel,
	pbonzini, mreitz

Hi Stefan,

On 2020/8/12 21:51, Stefan Hajnoczi wrote:
> On Mon, Aug 10, 2020 at 10:52:44PM +0800, Zhenyu Ye wrote:
>> Before doing qmp actions, we need to lock the qemu_global_mutex,
>> so the qmp actions should not take too long time.
>>
>> Unfortunately, some qmp actions need to acquire aio context and
>> this may take a long time.  The vm will soft lockup if this time
>> is too long.
>>
>> So add a timeout mechanism while doing qmp actions.
> 
> aio_context_acquire_timeout() is a workaround for code that blocks the
> event loop. Ideally there should be no code that blocks the event loop.
> 
> Which cases have you found where the event loop is blocked?
> 

Currently I only found the io_submit() will block while I/O pressure
is too high, for details, see:

	https://lore.kernel.org/qemu-devel/c6d75e49-3e36-6a76-fdc8-cdf09e7c3393@huawei.com/

io_submit can not ensure non-blocking at any time.

> I think they should be investigated and fixed (if possible) before
> introducing an API like aio_context_acquire_timeout().
> 

We cannot ensure that everything is non-blocking in iothread, because
some actions seems like asynchronous but will block in some times (such
as io_submit).  Anyway, the _timeout() API can make these qmp commands
(which need to get aio_context) be safer.

Thanks,
Zhenyu



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-08-11 13:54   ` Zhenyu Ye
@ 2020-08-21 12:52     ` Stefan Hajnoczi
  2020-09-14 13:27     ` Stefan Hajnoczi
  2020-09-14 14:42     ` Daniel P. Berrangé
  2 siblings, 0 replies; 28+ messages in thread
From: Stefan Hajnoczi @ 2020-08-21 12:52 UTC (permalink / raw)
  To: Zhenyu Ye
  Cc: Kevin Wolf, fam, qemu-block, armbru, xiexiangyou, qemu-devel,
	pbonzini, mreitz

[-- Attachment #1: Type: text/plain, Size: 4875 bytes --]

On Tue, Aug 11, 2020 at 09:54:08PM +0800, Zhenyu Ye wrote:
> Hi Kevin,
> 
> On 2020/8/10 23:38, Kevin Wolf wrote:
> > Am 10.08.2020 um 16:52 hat Zhenyu Ye geschrieben:
> >> Before doing qmp actions, we need to lock the qemu_global_mutex,
> >> so the qmp actions should not take too long time.
> >>
> >> Unfortunately, some qmp actions need to acquire aio context and
> >> this may take a long time.  The vm will soft lockup if this time
> >> is too long.
> > 
> > Do you have a specific situation in mind where getting the lock of an
> > AioContext can take a long time? I know that the main thread can
> > block for considerable time, but QMP commands run in the main thread, so
> > this patch doesn't change anything for this case. It would be effective
> > if an iothread blocks, but shouldn't everything running in an iothread
> > be asynchronous and therefore keep the AioContext lock only for a short
> > time?
> > 
> 
> Theoretically, everything running in an iothread is asynchronous. However,
> some 'asynchronous' actions are not non-blocking entirely, such as
> io_submit().  This will block while the iodepth is too big and I/O pressure
> is too high.  If we do some qmp actions, such as 'info block', at this time,
> may cause vm soft lockup.  This series can make these qmp actions safer.
> 
> I constructed the scene as follow:
> 1. create a vm with 4 disks, using iothread.
> 2. add press to the CPU on the host.  In my scene, the CPU usage exceeds 95%.
> 3. add press to the 4 disks in the vm at the same time.  I used the fio and
> some parameters are:
> 
> 	 fio -rw=randrw -bs=1M -size=1G -iodepth=512 -ioengine=libaio -numjobs=4
> 
> 4. do block query actions, for example, by virsh:
> 
> 	virsh qemu-monitor-command [vm name] --hmp info block

Great, thanks for describing a reproducer.

Do you also have a QEMU backtrace?

I'm curious whether io_submit(2) is blocking for 40+ seconds or whether
the softlockup is caused by QEMU code waiting to drain in-flight I/O
requests with the global mutex held.

I'll try to reproduce this here and start with:

  host# perf record -a -e syscalls:sys_enter_io_submit \
                       -e syscalls:sys_exit_io_submit \
                       -e kvm:kvm_exit \
                       -e kvm:kvm_entry

This will allow us to check the io_submit() and vmexit->vmenter timings.

I want to understand this better to be sure that the timeout is really
the only solution before merging this patch. My concern is that relying
on timeouts will hide QEMU bugs that could be fixed instead.

> Then the vm will soft lockup, the calltrace is:
> 
> [  192.311393] watchdog: BUG: soft lockup - CPU#1 stuck for 42s! [kworker/1:1:33]
> [  192.314241] Kernel panic - not syncing: softlockup: hung tasks
> [  192.316370] CPU: 1 PID: 33 Comm: kworker/1:1 Kdump: loaded Tainted: G           OEL    4.19.36+ #16
> [  192.318765] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
> [  192.325638] Workqueue: events drm_fb_helper_dirty_work
> [  192.roorr327238] Call trace:
> [  192.331528]  dump_backtrace+0x0/0x198
> [  192.332695]  show_stack+0x24/0x30
> [  192.334219]  dump_stack+0xa4/0xcc
> [  192.335578]  panic+0x12c/0x314
> [  192.336565]  watchdog_timoot_fn+0x3e4/0x3e8
> [  192.339984]  __hrtimer_run_queues+0x114/0x358
> [  192.341576]  hrtimer_interrupt+0x104/0x2d8
> [  192.343247]  arch_timer_handler_virt+0x38/0x58
> [  192.345074]  handle_percpu_devid_irq+0x90/0x248
> [  192.347238]  generic_handle_irq+0x34/0x50
> [  192.349536]  __handle_domain_irq+0x68/0xc0
> [  192.351193]  gic_handle_irq+0x6c/0x150
> [  192.352639]  el1_irq+0xb8/0x140
> [  192.353855]  vp_notify+0x28/0x38 [virtio_pci]
> [  192.355646]  virtqueue_kick+0x3c/0x78 [virtio_ring]
> [  192.357539]  virtio_gpu_queue_ctrl_buffer_locked+0x180/0x248 [virtio_gpu]
> [  192.359869]  virtio_gpu_queue_ctrl_buffer+0x50/0x78 [virtio_gpu]
> [  192.361456]  virtio_gpu_cmd_resource_flush+0x8c/0xb0 [virtio_gpu]
> [  192.363422]  virtio_gpu_surface_dirty+0x60/0x110 [virtio_gpu]
> [  192.365215]  virtio_gpu_framebuffer_surface_dirty+0x34/0x48 [virtio_gpu]
> [  192.367452]  drm_fb_helper_dirty_work+0x178/0x1c0
> [  192.368912]  process_one_work+0x1b4/0x3f8
> [  192.370192]  worker_thread+0x54/0x470
> [  192.371370]  kthread+0x134/0x138
> [  192.379241]  ret_from_fork+0x10/0x18
> [  192.382688] kernel fault(0x5) notification starting on CPU 1
> [  192.385059] kernel fault(0x5) notification finished on CPU 1
> [  192.387311] SMP: stopping secondary CPUs
> [  192.391024] Kernel Offset: disabled
> [  192.392111] CPU features: 0x0,a1806008
> [  192.393306] Memory Limit: none
> [  192.396701] Starting crashdump kernel...
> [  192.397821] Bye!
> 
> This problem can be avoided after this series applied.
> 
> Thanks,
> Zhenyu
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-08-11 13:54   ` Zhenyu Ye
  2020-08-21 12:52     ` Stefan Hajnoczi
@ 2020-09-14 13:27     ` Stefan Hajnoczi
  2020-09-17  7:36       ` Zhenyu Ye
  2020-09-14 14:42     ` Daniel P. Berrangé
  2 siblings, 1 reply; 28+ messages in thread
From: Stefan Hajnoczi @ 2020-09-14 13:27 UTC (permalink / raw)
  To: Zhenyu Ye
  Cc: Kevin Wolf, fam, qemu-block, armbru, xiexiangyou, qemu-devel,
	pbonzini, mreitz

[-- Attachment #1: Type: text/plain, Size: 2270 bytes --]

On Tue, Aug 11, 2020 at 09:54:08PM +0800, Zhenyu Ye wrote:
> Hi Kevin,
> 
> On 2020/8/10 23:38, Kevin Wolf wrote:
> > Am 10.08.2020 um 16:52 hat Zhenyu Ye geschrieben:
> >> Before doing qmp actions, we need to lock the qemu_global_mutex,
> >> so the qmp actions should not take too long time.
> >>
> >> Unfortunately, some qmp actions need to acquire aio context and
> >> this may take a long time.  The vm will soft lockup if this time
> >> is too long.
> > 
> > Do you have a specific situation in mind where getting the lock of an
> > AioContext can take a long time? I know that the main thread can
> > block for considerable time, but QMP commands run in the main thread, so
> > this patch doesn't change anything for this case. It would be effective
> > if an iothread blocks, but shouldn't everything running in an iothread
> > be asynchronous and therefore keep the AioContext lock only for a short
> > time?
> > 
> 
> Theoretically, everything running in an iothread is asynchronous. However,
> some 'asynchronous' actions are not non-blocking entirely, such as
> io_submit().  This will block while the iodepth is too big and I/O pressure
> is too high.  If we do some qmp actions, such as 'info block', at this time,
> may cause vm soft lockup.  This series can make these qmp actions safer.
> 
> I constructed the scene as follow:
> 1. create a vm with 4 disks, using iothread.
> 2. add press to the CPU on the host.  In my scene, the CPU usage exceeds 95%.
> 3. add press to the 4 disks in the vm at the same time.  I used the fio and
> some parameters are:
> 
> 	 fio -rw=randrw -bs=1M -size=1G -iodepth=512 -ioengine=libaio -numjobs=4
> 
> 4. do block query actions, for example, by virsh:
> 
> 	virsh qemu-monitor-command [vm name] --hmp info block
> 
> Then the vm will soft lockup, the calltrace is:
> 
> [  192.311393] watchdog: BUG: soft lockup - CPU#1 stuck for 42s! [kworker/1:1:33]

Hi,
Sorry I haven't had time to investigate this myself yet.

Do you also have a QEMU backtrace when the hang occurs?

Let's find out if QEMU is stuck in the io_submit(2) syscall or whether
there's an issue in QEMU itself that causes the softlockup (for example,
aio_poll() with the global mutex held).

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-08-11 13:54   ` Zhenyu Ye
  2020-08-21 12:52     ` Stefan Hajnoczi
  2020-09-14 13:27     ` Stefan Hajnoczi
@ 2020-09-14 14:42     ` Daniel P. Berrangé
  2020-09-17  8:12       ` Zhenyu Ye
  2 siblings, 1 reply; 28+ messages in thread
From: Daniel P. Berrangé @ 2020-09-14 14:42 UTC (permalink / raw)
  To: Zhenyu Ye
  Cc: Kevin Wolf, fam, qemu-block, qemu-devel, xiexiangyou, armbru,
	stefanha, pbonzini, mreitz

On Tue, Aug 11, 2020 at 09:54:08PM +0800, Zhenyu Ye wrote:
> Hi Kevin,
> 
> On 2020/8/10 23:38, Kevin Wolf wrote:
> > Am 10.08.2020 um 16:52 hat Zhenyu Ye geschrieben:
> >> Before doing qmp actions, we need to lock the qemu_global_mutex,
> >> so the qmp actions should not take too long time.
> >>
> >> Unfortunately, some qmp actions need to acquire aio context and
> >> this may take a long time.  The vm will soft lockup if this time
> >> is too long.
> > 
> > Do you have a specific situation in mind where getting the lock of an
> > AioContext can take a long time? I know that the main thread can
> > block for considerable time, but QMP commands run in the main thread, so
> > this patch doesn't change anything for this case. It would be effective
> > if an iothread blocks, but shouldn't everything running in an iothread
> > be asynchronous and therefore keep the AioContext lock only for a short
> > time?
> > 
> 
> Theoretically, everything running in an iothread is asynchronous. However,
> some 'asynchronous' actions are not non-blocking entirely, such as
> io_submit().  This will block while the iodepth is too big and I/O pressure
> is too high.  If we do some qmp actions, such as 'info block', at this time,
> may cause vm soft lockup.  This series can make these qmp actions safer.
> 
> I constructed the scene as follow:
> 1. create a vm with 4 disks, using iothread.
> 2. add press to the CPU on the host.  In my scene, the CPU usage exceeds 95%.
> 3. add press to the 4 disks in the vm at the same time.  I used the fio and
> some parameters are:
> 
> 	 fio -rw=randrw -bs=1M -size=1G -iodepth=512 -ioengine=libaio -numjobs=4
> 
> 4. do block query actions, for example, by virsh:
> 
> 	virsh qemu-monitor-command [vm name] --hmp info block
> 
> Then the vm will soft lockup, the calltrace is:

[snip]

> This problem can be avoided after this series applied.

At what cost though ?   With this timeout, QEMU is going to start
reporting bogus failures for various QMP commands when running
under high load, even if those commands would actually run
successfully.  This will turn into an error report from libvirt
which will in turn probably cause an error in the mgmt application
using libvirt, and in turn could break the user's automation.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-09-14 13:27     ` Stefan Hajnoczi
@ 2020-09-17  7:36       ` Zhenyu Ye
  2020-09-17 10:10         ` Fam Zheng
  2020-09-17 15:44         ` Stefan Hajnoczi
  0 siblings, 2 replies; 28+ messages in thread
From: Zhenyu Ye @ 2020-09-17  7:36 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Kevin Wolf, fam, qemu-block, armbru, xiexiangyou, qemu-devel,
	pbonzini, mreitz

Hi Stefan,

On 2020/9/14 21:27, Stefan Hajnoczi wrote:
>>
>> Theoretically, everything running in an iothread is asynchronous. However,
>> some 'asynchronous' actions are not non-blocking entirely, such as
>> io_submit().  This will block while the iodepth is too big and I/O pressure
>> is too high.  If we do some qmp actions, such as 'info block', at this time,
>> may cause vm soft lockup.  This series can make these qmp actions safer.
>>
>> I constructed the scene as follow:
>> 1. create a vm with 4 disks, using iothread.
>> 2. add press to the CPU on the host.  In my scene, the CPU usage exceeds 95%.
>> 3. add press to the 4 disks in the vm at the same time.  I used the fio and
>> some parameters are:
>>
>> 	 fio -rw=randrw -bs=1M -size=1G -iodepth=512 -ioengine=libaio -numjobs=4
>>
>> 4. do block query actions, for example, by virsh:
>>
>> 	virsh qemu-monitor-command [vm name] --hmp info block
>>
>> Then the vm will soft lockup, the calltrace is:
>>
>> [  192.311393] watchdog: BUG: soft lockup - CPU#1 stuck for 42s! [kworker/1:1:33]
> 
> Hi,
> Sorry I haven't had time to investigate this myself yet.
> 
> Do you also have a QEMU backtrace when the hang occurs?
> 
> Let's find out if QEMU is stuck in the io_submit(2) syscall or whether
> there's an issue in QEMU itself that causes the softlockup (for example,
> aio_poll() with the global mutex held).

Sorry for the delay.

I traced the io_submit() within the guest. The maximum execution time exceeds 16s:

	guest# perf trace -p `pidof -s fio` --duration 1
	    14.808 (3843.082 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	  3861.336 (11470.608 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 15341.998 (479.283 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 15831.380 (3704.997 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 19547.869 (3412.839 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 22966.953 (1080.854 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 24062.689 (6939.813 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 31019.219 (532.147 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 31556.610 (3469.920 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 35038.885 (9007.175 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 44053.578 (16006.405 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 60068.944 (3068.748 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	 63138.474 (13012.499 ms): io_getevents(ctx_id: 281472924459008, min_nr: 511, nr: 511, events: 0x19a83150) = 511
	 76191.073 (2872.657 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
	...

And in the host, the information of sys_enter_io_submit() is:

	Samples: 3K of event 'syscalls:sys_enter_io_submit', Event count (approx.): 3150
	Children      Self  Trace output
	-   66.70%    66.70%  ctx_id: 0xffff9c044000, nr: 0x00000001, iocbpp: 0xffff9f7fad28
	0xffffae7f871c
	0xffffae8a27c4
	qemu_thread_start
	iothread_run
	aio_poll
	aio_dispatch_ready_handlers
	aio_dispatch_handler
	virtio_queue_host_notifier_aio_read
	virtio_queue_notify_aio_vq
	virtio_blk_data_plane_handle_output
	virtio_blk_handle_vq
	blk_io_unplug
	bdrv_io_unplug
	bdrv_io_unplug
	raw_aio_unplug
	laio_io_unplug
	syscall


When the hang occurs, the QEMU is blocked at:

	#0  0x0000ffff95762b64 in ?? () from target:/usr/lib64/libpthread.so.0
	#1  0x0000ffff9575bd88 in pthread_mutex_lock () from target:/usr/lib64/libpthread.so.0
	#2  0x0000aaaabb1f5948 in qemu_mutex_lock_impl (mutex=0xaaaacc8e1860,
	    file=0xaaaabb4e1bd0 "/Images/eillon/CODE/5-opensource/qemu/util/async.c", line=605)
	#3  0x0000aaaabb20acd4 in aio_context_acquire (ctx=0xaaaacc8e1800)
	#4  0x0000aaaabb105e90 in bdrv_query_image_info (bs=0xaaaacc934620,
	    p_info=0xaaaaccc41e18, errp=0xffffca669118)
	#5  0x0000aaaabb105968 in bdrv_block_device_info (blk=0xaaaacdca19f0, bs=0xaaaacc934620,
	    flat=false, errp=0xffffca6692b8)
	#6  0x0000aaaabb1063dc in bdrv_query_info (blk=0xaaaacdca19f0, p_info=0xaaaacd29c9a8,
	    errp=0xffffca6692b8)
	#7  0x0000aaaabb106c14 in qmp_query_block (errp=0x0)
	#8  0x0000aaaabacb8e6c in hmp_info_block (mon=0xffffca6693d0, qdict=0xaaaacd089790)
	#9  0x0000aaaabb0068f0 in handle_hmp_command (mon=0xffffca6693d0,
	    cmdline=0xaaaacc9013ca "")
	#10 0x0000aaaabaab5034 in qmp_human_monitor_command (
	    command_line=0xaaaacc9013c0 "info block", has_cpu_index=false, cpu_index=0,
	    errp=0xffffca6694d8)
	#11 0x0000aaaabb03c720 in qmp_marshal_human_monitor_command (args=0xffff70008070,
	    ret=0xffffca669570, errp=0xffffca669568) at qapi/qapi-commands-misc.c:653
	#12 0x0000aaaabb18fdbc in qmp_dispatch (cmds=0xaaaabbb197c0 <qmp_commands>,
	    request=0xffff70005cc0, allow_oob=false)
	#13 0x0000aaaabb003074 in monitor_qmp_dispatch (mon=0xaaaacc962320, req=0xffff70005cc0)
	#14 0x0000aaaabb003488 in monitor_qmp_bh_dispatcher (data=0x0)
	#15 0x0000aaaabb209d44 in aio_bh_call (bh=0xaaaacc5f0810)
	#16 0x0000aaaabb209e50 in aio_bh_poll (ctx=0xaaaacc5ef320)
	#17 0x0000aaaabb1f1494 in aio_dispatch (ctx=0xaaaacc5ef320)
	#18 0x0000aaaabb20a330 in aio_ctx_dispatch (source=0xaaaacc5ef320, callback=0x0,
	    user_data=0x0)
	#19 0x0000ffff95f00a7c in g_main_context_dispatch ()
	   from target:/usr/lib64/libglib-2.0.so.0
	#20 0x0000aaaabb2128e8 in glib_pollfds_poll ()
	#21 0x0000aaaabb212970 in os_host_main_loop_wait (timeout=64805420)
	#22 0x0000aaaabb212a90 in main_loop_wait (nonblocking=0)
	#23 0x0000aaaabaad63d0 in qemu_main_loop ()
	#24 0x0000aaaabb188b54 in main (argc=117, argv=0xffffca669a68, envp=0xffffca669e18)

And the QEMU process backtrace in kernel is:
	[<0>] __switch_to+0xdc/0x140
	[<0>] futex_wait_queue_me+0xd4/0x158
	[<0>] futex_wait+0xf4/0x230
	[<0>] do_futex+0x46c/0x608
	[<0>] __arm64_sys_futex+0x13c/0x188
	[<0>] el0_svc_common+0x80/0x1b8
	[<0>] el0_svc_handler+0x38/0x78
	[<0>] el0_svc+0x10/0x14
	[<0>] 0xffffffffffffffff


Thanks,
Zhenyu


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-09-14 14:42     ` Daniel P. Berrangé
@ 2020-09-17  8:12       ` Zhenyu Ye
  0 siblings, 0 replies; 28+ messages in thread
From: Zhenyu Ye @ 2020-09-17  8:12 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, fam, qemu-block, qemu-devel, xiexiangyou, armbru,
	stefanha, pbonzini, mreitz

Hi Daniel,

On 2020/9/14 22:42, Daniel P. Berrangé wrote:
> On Tue, Aug 11, 2020 at 09:54:08PM +0800, Zhenyu Ye wrote:
>> Hi Kevin,
>>
>> On 2020/8/10 23:38, Kevin Wolf wrote:
>>> Am 10.08.2020 um 16:52 hat Zhenyu Ye geschrieben:
>>>> Before doing qmp actions, we need to lock the qemu_global_mutex,
>>>> so the qmp actions should not take too long time.
>>>>
>>>> Unfortunately, some qmp actions need to acquire aio context and
>>>> this may take a long time.  The vm will soft lockup if this time
>>>> is too long.
>>>
>>> Do you have a specific situation in mind where getting the lock of an
>>> AioContext can take a long time? I know that the main thread can
>>> block for considerable time, but QMP commands run in the main thread, so
>>> this patch doesn't change anything for this case. It would be effective
>>> if an iothread blocks, but shouldn't everything running in an iothread
>>> be asynchronous and therefore keep the AioContext lock only for a short
>>> time?
>>>
>>
>> Theoretically, everything running in an iothread is asynchronous. However,
>> some 'asynchronous' actions are not non-blocking entirely, such as
>> io_submit().  This will block while the iodepth is too big and I/O pressure
>> is too high.  If we do some qmp actions, such as 'info block', at this time,
>> may cause vm soft lockup.  This series can make these qmp actions safer.
>>
>> I constructed the scene as follow:
>> 1. create a vm with 4 disks, using iothread.
>> 2. add press to the CPU on the host.  In my scene, the CPU usage exceeds 95%.
>> 3. add press to the 4 disks in the vm at the same time.  I used the fio and
>> some parameters are:
>>
>> 	 fio -rw=randrw -bs=1M -size=1G -iodepth=512 -ioengine=libaio -numjobs=4
>>
>> 4. do block query actions, for example, by virsh:
>>
>> 	virsh qemu-monitor-command [vm name] --hmp info block
>>
>> Then the vm will soft lockup, the calltrace is:
> 
> [snip]
> 
>> This problem can be avoided after this series applied.
> 
> At what cost though ?   With this timeout, QEMU is going to start
> reporting bogus failures for various QMP commands when running
> under high load, even if those commands would actually run
> successfully.  This will turn into an error report from libvirt
> which will in turn probably cause an error in the mgmt application
> using libvirt, and in turn could break the user's automation.
> 

I think it's worth reporting an error to avoid the VM softlockup.
The VM may even crash if kernel.softlockup_panic is configured!

We can increase the timeout value (close to the VM cpu soft lock time)
to avoid unnecessary errors.

Thanks,
Zhenyu


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-09-17  7:36       ` Zhenyu Ye
@ 2020-09-17 10:10         ` Fam Zheng
  2020-09-17 15:44         ` Stefan Hajnoczi
  1 sibling, 0 replies; 28+ messages in thread
From: Fam Zheng @ 2020-09-17 10:10 UTC (permalink / raw)
  To: Zhenyu Ye
  Cc: Kevin Wolf, qemu-block, qemu-devel, xiexiangyou, armbru,
	Stefan Hajnoczi, pbonzini, mreitz

On 2020-09-17 15:36, Zhenyu Ye wrote:
> Hi Stefan,
> 
> On 2020/9/14 21:27, Stefan Hajnoczi wrote:
> >>
> >> Theoretically, everything running in an iothread is asynchronous. However,
> >> some 'asynchronous' actions are not non-blocking entirely, such as
> >> io_submit().  This will block while the iodepth is too big and I/O pressure
> >> is too high.  If we do some qmp actions, such as 'info block', at this time,
> >> may cause vm soft lockup.  This series can make these qmp actions safer.
> >>
> >> I constructed the scene as follow:
> >> 1. create a vm with 4 disks, using iothread.
> >> 2. add press to the CPU on the host.  In my scene, the CPU usage exceeds 95%.
> >> 3. add press to the 4 disks in the vm at the same time.  I used the fio and
> >> some parameters are:
> >>
> >> 	 fio -rw=randrw -bs=1M -size=1G -iodepth=512 -ioengine=libaio -numjobs=4
> >>
> >> 4. do block query actions, for example, by virsh:
> >>
> >> 	virsh qemu-monitor-command [vm name] --hmp info block
> >>
> >> Then the vm will soft lockup, the calltrace is:
> >>
> >> [  192.311393] watchdog: BUG: soft lockup - CPU#1 stuck for 42s! [kworker/1:1:33]
> > 
> > Hi,
> > Sorry I haven't had time to investigate this myself yet.
> > 
> > Do you also have a QEMU backtrace when the hang occurs?
> > 
> > Let's find out if QEMU is stuck in the io_submit(2) syscall or whether
> > there's an issue in QEMU itself that causes the softlockup (for example,
> > aio_poll() with the global mutex held).
> 
> Sorry for the delay.
> 
> I traced the io_submit() within the guest. The maximum execution time exceeds 16s:
> 
> 	guest# perf trace -p `pidof -s fio` --duration 1
> 	    14.808 (3843.082 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	  3861.336 (11470.608 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 15341.998 (479.283 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 15831.380 (3704.997 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 19547.869 (3412.839 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 22966.953 (1080.854 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 24062.689 (6939.813 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 31019.219 (532.147 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 31556.610 (3469.920 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 35038.885 (9007.175 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 44053.578 (16006.405 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 60068.944 (3068.748 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	 63138.474 (13012.499 ms): io_getevents(ctx_id: 281472924459008, min_nr: 511, nr: 511, events: 0x19a83150) = 511
> 	 76191.073 (2872.657 ms): io_submit(ctx_id: 281472924459008, nr: 1, iocbpp: 0x19a87160          ) = 1
> 	...
> 
> And in the host, the information of sys_enter_io_submit() is:
> 
> 	Samples: 3K of event 'syscalls:sys_enter_io_submit', Event count (approx.): 3150
> 	Children      Self  Trace output
> 	-   66.70%    66.70%  ctx_id: 0xffff9c044000, nr: 0x00000001, iocbpp: 0xffff9f7fad28
> 	0xffffae7f871c
> 	0xffffae8a27c4
> 	qemu_thread_start
> 	iothread_run
> 	aio_poll
> 	aio_dispatch_ready_handlers
> 	aio_dispatch_handler
> 	virtio_queue_host_notifier_aio_read
> 	virtio_queue_notify_aio_vq
> 	virtio_blk_data_plane_handle_output
> 	virtio_blk_handle_vq
> 	blk_io_unplug
> 	bdrv_io_unplug
> 	bdrv_io_unplug
> 	raw_aio_unplug
> 	laio_io_unplug
> 	syscall
> 
> 
> When the hang occurs, the QEMU is blocked at:
> 
> 	#0  0x0000ffff95762b64 in ?? () from target:/usr/lib64/libpthread.so.0
> 	#1  0x0000ffff9575bd88 in pthread_mutex_lock () from target:/usr/lib64/libpthread.so.0
> 	#2  0x0000aaaabb1f5948 in qemu_mutex_lock_impl (mutex=0xaaaacc8e1860,
> 	    file=0xaaaabb4e1bd0 "/Images/eillon/CODE/5-opensource/qemu/util/async.c", line=605)
> 	#3  0x0000aaaabb20acd4 in aio_context_acquire (ctx=0xaaaacc8e1800)
> 	#4  0x0000aaaabb105e90 in bdrv_query_image_info (bs=0xaaaacc934620,
> 	    p_info=0xaaaaccc41e18, errp=0xffffca669118)
> 	#5  0x0000aaaabb105968 in bdrv_block_device_info (blk=0xaaaacdca19f0, bs=0xaaaacc934620,
> 	    flat=false, errp=0xffffca6692b8)
> 	#6  0x0000aaaabb1063dc in bdrv_query_info (blk=0xaaaacdca19f0, p_info=0xaaaacd29c9a8,
> 	    errp=0xffffca6692b8)
> 	#7  0x0000aaaabb106c14 in qmp_query_block (errp=0x0)
> 	#8  0x0000aaaabacb8e6c in hmp_info_block (mon=0xffffca6693d0, qdict=0xaaaacd089790)
> 	#9  0x0000aaaabb0068f0 in handle_hmp_command (mon=0xffffca6693d0,
> 	    cmdline=0xaaaacc9013ca "")
> 	#10 0x0000aaaabaab5034 in qmp_human_monitor_command (
> 	    command_line=0xaaaacc9013c0 "info block", has_cpu_index=false, cpu_index=0,
> 	    errp=0xffffca6694d8)
> 	#11 0x0000aaaabb03c720 in qmp_marshal_human_monitor_command (args=0xffff70008070,
> 	    ret=0xffffca669570, errp=0xffffca669568) at qapi/qapi-commands-misc.c:653
> 	#12 0x0000aaaabb18fdbc in qmp_dispatch (cmds=0xaaaabbb197c0 <qmp_commands>,
> 	    request=0xffff70005cc0, allow_oob=false)
> 	#13 0x0000aaaabb003074 in monitor_qmp_dispatch (mon=0xaaaacc962320, req=0xffff70005cc0)
> 	#14 0x0000aaaabb003488 in monitor_qmp_bh_dispatcher (data=0x0)
> 	#15 0x0000aaaabb209d44 in aio_bh_call (bh=0xaaaacc5f0810)
> 	#16 0x0000aaaabb209e50 in aio_bh_poll (ctx=0xaaaacc5ef320)
> 	#17 0x0000aaaabb1f1494 in aio_dispatch (ctx=0xaaaacc5ef320)
> 	#18 0x0000aaaabb20a330 in aio_ctx_dispatch (source=0xaaaacc5ef320, callback=0x0,
> 	    user_data=0x0)
> 	#19 0x0000ffff95f00a7c in g_main_context_dispatch ()
> 	   from target:/usr/lib64/libglib-2.0.so.0
> 	#20 0x0000aaaabb2128e8 in glib_pollfds_poll ()
> 	#21 0x0000aaaabb212970 in os_host_main_loop_wait (timeout=64805420)
> 	#22 0x0000aaaabb212a90 in main_loop_wait (nonblocking=0)
> 	#23 0x0000aaaabaad63d0 in qemu_main_loop ()
> 	#24 0x0000aaaabb188b54 in main (argc=117, argv=0xffffca669a68, envp=0xffffca669e18)
> 
> And the QEMU process backtrace in kernel is:
> 	[<0>] __switch_to+0xdc/0x140
> 	[<0>] futex_wait_queue_me+0xd4/0x158
> 	[<0>] futex_wait+0xf4/0x230
> 	[<0>] do_futex+0x46c/0x608
> 	[<0>] __arm64_sys_futex+0x13c/0x188
> 	[<0>] el0_svc_common+0x80/0x1b8
> 	[<0>] el0_svc_handler+0x38/0x78
> 	[<0>] el0_svc+0x10/0x14
> 	[<0>] 0xffffffffffffffff

Hi Zhenyu,

Did you by any chance capture the kernel stack where io_submit is stuck?

Thanks,
Fam


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-09-17  7:36       ` Zhenyu Ye
  2020-09-17 10:10         ` Fam Zheng
@ 2020-09-17 15:44         ` Stefan Hajnoczi
  2020-09-17 16:01           ` Fam Zheng
  1 sibling, 1 reply; 28+ messages in thread
From: Stefan Hajnoczi @ 2020-09-17 15:44 UTC (permalink / raw)
  To: Zhenyu Ye
  Cc: Kevin Wolf, fam, qemu-block, armbru, xiexiangyou, qemu-devel,
	pbonzini, mreitz

[-- Attachment #1: Type: text/plain, Size: 1287 bytes --]

On Thu, Sep 17, 2020 at 03:36:57PM +0800, Zhenyu Ye wrote:
> When the hang occurs, the QEMU is blocked at:
> 
> 	#0  0x0000ffff95762b64 in ?? () from target:/usr/lib64/libpthread.so.0
> 	#1  0x0000ffff9575bd88 in pthread_mutex_lock () from target:/usr/lib64/libpthread.so.0
> 	#2  0x0000aaaabb1f5948 in qemu_mutex_lock_impl (mutex=0xaaaacc8e1860,
> 	    file=0xaaaabb4e1bd0 "/Images/eillon/CODE/5-opensource/qemu/util/async.c", line=605)
> 	#3  0x0000aaaabb20acd4 in aio_context_acquire (ctx=0xaaaacc8e1800)
> 	#4  0x0000aaaabb105e90 in bdrv_query_image_info (bs=0xaaaacc934620,
> 	    p_info=0xaaaaccc41e18, errp=0xffffca669118)
> 	#5  0x0000aaaabb105968 in bdrv_block_device_info (blk=0xaaaacdca19f0, bs=0xaaaacc934620,
> 	    flat=false, errp=0xffffca6692b8)
> 	#6  0x0000aaaabb1063dc in bdrv_query_info (blk=0xaaaacdca19f0, p_info=0xaaaacd29c9a8,
> 	    errp=0xffffca6692b8)
> 	#7  0x0000aaaabb106c14 in qmp_query_block (errp=0x0)
> 	#8  0x0000aaaabacb8e6c in hmp_info_block (mon=0xffffca6693d0, qdict=0xaaaacd089790)

Great, this shows that the main loop thread is stuck waiting for the
AioContext lock.

Please post backtraces from all QEMU threads ((gdb) thread apply all bt)
so we can figure out which thread is holding up the main loop.

Thanks,
Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-09-17 15:44         ` Stefan Hajnoczi
@ 2020-09-17 16:01           ` Fam Zheng
  2020-09-18 11:23             ` Zhenyu Ye
  0 siblings, 1 reply; 28+ messages in thread
From: Fam Zheng @ 2020-09-17 16:01 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Kevin Wolf, qemu-block, Zhenyu Ye, qemu-devel, xiexiangyou,
	armbru, pbonzini, mreitz

On 2020-09-17 16:44, Stefan Hajnoczi wrote:
> On Thu, Sep 17, 2020 at 03:36:57PM +0800, Zhenyu Ye wrote:
> > When the hang occurs, the QEMU is blocked at:
> > 
> > 	#0  0x0000ffff95762b64 in ?? () from target:/usr/lib64/libpthread.so.0
> > 	#1  0x0000ffff9575bd88 in pthread_mutex_lock () from target:/usr/lib64/libpthread.so.0
> > 	#2  0x0000aaaabb1f5948 in qemu_mutex_lock_impl (mutex=0xaaaacc8e1860,
> > 	    file=0xaaaabb4e1bd0 "/Images/eillon/CODE/5-opensource/qemu/util/async.c", line=605)
> > 	#3  0x0000aaaabb20acd4 in aio_context_acquire (ctx=0xaaaacc8e1800)
> > 	#4  0x0000aaaabb105e90 in bdrv_query_image_info (bs=0xaaaacc934620,
> > 	    p_info=0xaaaaccc41e18, errp=0xffffca669118)
> > 	#5  0x0000aaaabb105968 in bdrv_block_device_info (blk=0xaaaacdca19f0, bs=0xaaaacc934620,
> > 	    flat=false, errp=0xffffca6692b8)
> > 	#6  0x0000aaaabb1063dc in bdrv_query_info (blk=0xaaaacdca19f0, p_info=0xaaaacd29c9a8,
> > 	    errp=0xffffca6692b8)
> > 	#7  0x0000aaaabb106c14 in qmp_query_block (errp=0x0)
> > 	#8  0x0000aaaabacb8e6c in hmp_info_block (mon=0xffffca6693d0, qdict=0xaaaacd089790)
> 
> Great, this shows that the main loop thread is stuck waiting for the
> AioContext lock.
> 
> Please post backtraces from all QEMU threads ((gdb) thread apply all bt)
> so we can figure out which thread is holding up the main loop.

I think that is reflected in the perf backtrace posted by Zheny already:

And in the host, the information of sys_enter_io_submit() is:

Samples: 3K of event 'syscalls:sys_enter_io_submit', Event count
(approx.): 3150
   Children      Self  Trace output
   -   66.70%    66.70%  ctx_id: 0xffff9c044000,
   nr: 0x00000001, iocbpp: 0xffff9f7fad28
   0xffffae7f871c
   0xffffae8a27c4
   qemu_thread_start
   iothread_run
   aio_poll
   aio_dispatch_ready_handlers
   aio_dispatch_handler
   virtio_queue_host_notifier_aio_read
   virtio_queue_notify_aio_vq
   virtio_blk_data_plane_handle_output
   virtio_blk_handle_vq
   blk_io_unplug
   bdrv_io_unplug
   bdrv_io_unplug
   raw_aio_unplug
   laio_io_unplug
   syscall

So the iothread is blocked by a slow io_submit holding the AioContext
lock.

It would be interesting to know what in kernel is blocking io_submit
from returning.

Fam


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-09-17 16:01           ` Fam Zheng
@ 2020-09-18 11:23             ` Zhenyu Ye
  2020-09-18 14:06               ` Fam Zheng
  0 siblings, 1 reply; 28+ messages in thread
From: Zhenyu Ye @ 2020-09-18 11:23 UTC (permalink / raw)
  To: Fam Zheng, Stefan Hajnoczi
  Cc: Kevin Wolf, qemu-block, qemu-devel, xiexiangyou, armbru,
	pbonzini, mreitz

Hi Stefan, Fam,

On 2020/9/18 0:01, Fam Zheng wrote:
> On 2020-09-17 16:44, Stefan Hajnoczi wrote:
>> On Thu, Sep 17, 2020 at 03:36:57PM +0800, Zhenyu Ye wrote:
>>> When the hang occurs, the QEMU is blocked at:
>>>
>>> 	#0  0x0000ffff95762b64 in ?? () from target:/usr/lib64/libpthread.so.0
>>> 	#1  0x0000ffff9575bd88 in pthread_mutex_lock () from target:/usr/lib64/libpthread.so.0
>>> 	#2  0x0000aaaabb1f5948 in qemu_mutex_lock_impl (mutex=0xaaaacc8e1860,
>>> 	    file=0xaaaabb4e1bd0 "/Images/eillon/CODE/5-opensource/qemu/util/async.c", line=605)
>>> 	#3  0x0000aaaabb20acd4 in aio_context_acquire (ctx=0xaaaacc8e1800)
>>> 	#4  0x0000aaaabb105e90 in bdrv_query_image_info (bs=0xaaaacc934620,
>>> 	    p_info=0xaaaaccc41e18, errp=0xffffca669118)
>>> 	#5  0x0000aaaabb105968 in bdrv_block_device_info (blk=0xaaaacdca19f0, bs=0xaaaacc934620,
>>> 	    flat=false, errp=0xffffca6692b8)
>>> 	#6  0x0000aaaabb1063dc in bdrv_query_info (blk=0xaaaacdca19f0, p_info=0xaaaacd29c9a8,
>>> 	    errp=0xffffca6692b8)
>>> 	#7  0x0000aaaabb106c14 in qmp_query_block (errp=0x0)
>>> 	#8  0x0000aaaabacb8e6c in hmp_info_block (mon=0xffffca6693d0, qdict=0xaaaacd089790)
>>
>> Great, this shows that the main loop thread is stuck waiting for the
>> AioContext lock.
>>
>> Please post backtraces from all QEMU threads ((gdb) thread apply all bt)
>> so we can figure out which thread is holding up the main loop.
> 
> I think that is reflected in the perf backtrace posted by Zheny already:
> 
> And in the host, the information of sys_enter_io_submit() is:
> 
> Samples: 3K of event 'syscalls:sys_enter_io_submit', Event count
> (approx.): 3150
>    Children      Self  Trace output
>    -   66.70%    66.70%  ctx_id: 0xffff9c044000,
>    nr: 0x00000001, iocbpp: 0xffff9f7fad28
>    0xffffae7f871c
>    0xffffae8a27c4
>    qemu_thread_start
>    iothread_run
>    aio_poll
>    aio_dispatch_ready_handlers
>    aio_dispatch_handler
>    virtio_queue_host_notifier_aio_read
>    virtio_queue_notify_aio_vq
>    virtio_blk_data_plane_handle_output
>    virtio_blk_handle_vq
>    blk_io_unplug
>    bdrv_io_unplug
>    bdrv_io_unplug
>    raw_aio_unplug
>    laio_io_unplug
>    syscall
> 
> So the iothread is blocked by a slow io_submit holding the AioContext
> lock.
> 
> It would be interesting to know what in kernel is blocking io_submit
> from returning.
> 

Sorry, I only get the backtraces of all QEMU threads, like below:

	(gdb) thread apply all bt

	Thread 35 (LWP 49700):
	#0  0x0000ffff83148fe4 in pthread_cond_timedwait () at /lib64/libpthread.so.0
	#1  0x0000aaaae85f2080 in qemu_sem_timedwait (sem=0xaaaaef12e748, ms=10000)
	    at ../util/qemu-thread-posix.c:282
	#2  0x0000aaaae861c600 in worker_thread (opaque=0xaaaaef12e6c0)
	    at ../util/thread-pool.c:91
	#3  0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaf0204be0)
	    at ../util/qemu-thread-posix.c:521
	#4  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#5  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 34 (LWP 45337):
	#0  0x0000ffff83148fe4 in pthread_cond_timedwait () at /lib64/libpthread.so.0
	#1  0x0000aaaae85f2080 in qemu_sem_timedwait (sem=0xaaaaef12e748, ms=10000)
	    at ../util/qemu-thread-posix.c:282
	#2  0x0000aaaae861c600 in worker_thread (opaque=0xaaaaef12e6c0)
	    at ../util/thread-pool.c:91
	#3  0x0000aaaae85f269c in qemu_thread_start (args=0xfffcc8000b20)
	    at ../util/qemu-thread-posix.c:521
	#4  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#5  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 33 (LWP 45336):
	#0  0x0000ffff83148fe4 in pthread_cond_timedwait () at /lib64/libpthread.so.0
	#1  0x0000aaaae85f2080 in qemu_sem_timedwait (sem=0xaaaaef12e748, ms=10000)
	    at ../util/qemu-thread-posix.c:282
	#2  0x0000aaaae861c600 in worker_thread (opaque=0xaaaaef12e6c0)
	    at ../util/thread-pool.c:91
	#3  0x0000aaaae85f269c in qemu_thread_start (args=0xfffd14000b20)
	    at ../util/qemu-thread-posix.c:521
	#4  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#5  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 32 (LWP 45335):
	#0  0x0000ffff83148fe4 in pthread_cond_timedwait () at /lib64/libpthread.so.0
	#1  0x0000aaaae85f2080 in qemu_sem_timedwait (sem=0xaaaaef12e748, ms=10000)
	    at ../util/qemu-thread-posix.c:282
	#2  0x0000aaaae861c600 in worker_thread (opaque=0xaaaaef12e6c0)
	    at ../util/thread-pool.c:91
	#3  0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaefb00f00)
	    at ../util/qemu-thread-posix.c:521
	#4  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#5  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 12 (LWP 4849):
	#0  0x0000ffff83082a0c in ioctl () at /lib64/libc.so.6
	#1  0x0000aaaae83738dc in kvm_vcpu_ioctl (cpu=0xaaaaef26c8f0, type=44672)
	    at ../accel/kvm/kvm-all.c:2631
	#2  0x0000aaaae8373144 in kvm_cpu_exec (cpu=0xaaaaef26c8f0) at ../accel/kvm/kvm-all.c:2468
	#3  0x0000aaaae832a994 in qemu_kvm_cpu_thread_fn (arg=0xaaaaef26c8f0)
	    at ../softmmu/cpus.c:1188
	#4  0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef2c48c0)
	    at ../util/qemu-thread-posix.c:521
	#5  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#6  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 11 (LWP 4848):
	#0  0x0000ffff83082a0c in ioctl () at /lib64/libc.so.6
	#1  0x0000aaaae83738dc in kvm_vcpu_ioctl (cpu=0xaaaaef213020, type=44672)
	    at ../accel/kvm/kvm-all.c:2631
	#2  0x0000aaaae8373144 in kvm_cpu_exec (cpu=0xaaaaef213020) at ../accel/kvm/kvm-all.c:2468
	#3  0x0000aaaae832a994 in qemu_kvm_cpu_thread_fn (arg=0xaaaaef213020)
	    at ../softmmu/cpus.c:1188
	#4  0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef26b810)
	    at ../util/qemu-thread-posix.c:521
	#5  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#6  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 10 (LWP 4846):
	#0  0x0000ffff8314c1cc in  () at /lib64/libpthread.so.0
	#1  0x0000ffff83145050 in pthread_mutex_lock () at /lib64/libpthread.so.0
	#2  0x0000aaaae85f1848 in qemu_mutex_lock_impl
	    (mutex=0xaaaae8f594a0 <qemu_global_mutex>, file=0xaaaae8893270 "../exec.c", line=3140)
	    at ../util/qemu-thread-posix.c:79
	#3  0x0000aaaae832bc30 in qemu_mutex_lock_iothread_impl
	    (file=0xaaaae8893270 "../exec.c", line=3140) at ../softmmu/cpus.c:1782
	#4  0x0000aaaae84186cc in prepare_mmio_access (mr=0xaaaaf07f2540) at ../exec.c:3140
	#5  0x0000aaaae8418788 in flatview_write_continue
	    (fv=0xfffd3c438780, addr=549772603392, attrs=..., ptr=0xffff83f3a028, len=2, addr1=0, l=2, mr=0xaaaaf07f2540) at ../exec.c:3172
	#6  0x0000aaaae8418980 in flatview_write
	    (fv=0xfffd3c438780, addr=549772603392, attrs=..., buf=0xffff83f3a028, len=2)
	    at ../exec.c:3217
	#7  0x0000aaaae8418d68 in address_space_write
	    (as=0xaaaae8f75fd0 <address_space_memory>, addr=549772603392, attrs=..., buf=0xffff83f3a028, len=2) at ../exec.c:3308
	#8  0x0000aaaae8418de4 in address_space_rw
	    (as=0xaaaae8f75fd0 <address_space_memory>, addr=549772603392, attrs=..., buf=0xffff83f3a028, len=2, is_write=true) at ../exec.c:3318
	#9  0x0000aaaae8373388 in kvm_cpu_exec (cpu=0xaaaaef1b9b80) at ../accel/kvm/kvm-all.c:2518
	#10 0x0000aaaae832a994 in qemu_kvm_cpu_thread_fn (arg=0xaaaaef1b9b80)
	    at ../softmmu/cpus.c:1188
	#11 0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef211f40)
	    at ../util/qemu-thread-posix.c:521
	#12 0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#13 0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 9 (LWP 4845):
	#0  0x0000ffff83082a0c in ioctl () at /lib64/libc.so.6
	#1  0x0000aaaae83738dc in kvm_vcpu_ioctl (cpu=0xaaaaef15a0f0, type=44672)
	    at ../accel/kvm/kvm-all.c:2631
	#2  0x0000aaaae8373144 in kvm_cpu_exec (cpu=0xaaaaef15a0f0) at ../accel/kvm/kvm-all.c:2468
	#3  0x0000aaaae832a994 in qemu_kvm_cpu_thread_fn (arg=0xaaaaef15a0f0)
	    at ../softmmu/cpus.c:1188
	#4  0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef1b1e40)
	    at ../util/qemu-thread-posix.c:521
	#5  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#6  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 8 (LWP 4826):
	#0  0x0000ffff83080b04 in poll () at /lib64/libc.so.6
	#1  0x0000ffff83807648 in  () at /lib64/libglib-2.0.so.0
	#2  0x0000ffff83807a88 in g_main_loop_run () at /lib64/libglib-2.0.so.0
	#3  0x0000aaaae8027038 in iothread_run (opaque=0xaaaaef1141b0) at ../iothread.c:80
	#4  0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef10dc00)
	    at ../util/qemu-thread-posix.c:521
	#5  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#6  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 7 (LWP 4805):
	#0  0x0000ffff83080c18 in ppoll () at /lib64/libc.so.6
	#1  0x0000aaaae860dc4c in qemu_poll_ns (fds=0xffff68001fb0, nfds=6, timeout=-1)
	    at ../util/qemu-timer.c:335
	#2  0x0000aaaae86228fc in fdmon_poll_wait
	    (ctx=0xaaaaef0aaf50, ready_list=0xffff72ffbf70, timeout=-1) at ../util/fdmon-poll.c:79
	#3  0x0000aaaae85fb270 in aio_poll (ctx=0xaaaaef0aaf50, blocking=true)
	    at ../util/aio-posix.c:601
	#4  0x0000aaaae8027004 in iothread_run (opaque=0xaaaaeee62c00) at ../iothread.c:73
	#5  0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef0ab4b0)
	    at ../util/qemu-thread-posix.c:521
	#6  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#7  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 6 (LWP 4804):
	#0  0x0000ffff83080c18 in ppoll () at /lib64/libc.so.6
	#1  0x0000aaaae860dc4c in qemu_poll_ns (fds=0xffff64001fb0, nfds=6, timeout=-1)
	    at ../util/qemu-timer.c:335
	#2  0x0000aaaae86228fc in fdmon_poll_wait
	    (ctx=0xaaaaef0a9930, ready_list=0xffff737fcf70, timeout=-1) at ../util/fdmon-poll.c:79
	#3  0x0000aaaae85fb270 in aio_poll (ctx=0xaaaaef0a9930, blocking=true)
	    at ../util/aio-posix.c:601
	#4  0x0000aaaae8027004 in iothread_run (opaque=0xaaaaeee6f400) at ../iothread.c:73
	#5  0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef0aab20)
	    at ../util/qemu-thread-posix.c:521
	#6  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#7  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 5 (LWP 4802):
	#0  0x0000ffff83086b54 in syscall () at /lib64/libc.so.6
	#1  0x0000ffff834598b8 in io_submit () at /lib64/libaio.so.1
	#2  0x0000aaaae851e89c in ioq_submit (s=0xfffd3c001bb0) at ../block/linux-aio.c:299
	#3  0x0000aaaae851eb50 in laio_io_unplug (bs=0xaaaaef0f2340, s=0xfffd3c001bb0)
	    at ../block/linux-aio.c:344
	#4  0x0000aaaae8559f1c in raw_aio_unplug (bs=0xaaaaef0f2340) at ../block/file-posix.c:2063
	#5  0x0000aaaae8538344 in bdrv_io_unplug (bs=0xaaaaef0f2340) at ../block/io.c:3135
	#6  0x0000aaaae8538360 in bdrv_io_unplug (bs=0xaaaaef0eb020) at ../block/io.c:3140
	#7  0x0000aaaae8496104 in blk_io_unplug (blk=0xaaaaef0e8f20)
	    at ../block/block-backend.c:2147
	#8  0x0000aaaae830e1a4 in virtio_blk_handle_vq (s=0xaaaaf0374280, vq=0xffff700fc1d8)
	    at ../hw/block/virtio-blk.c:796
	#9  0x0000aaaae82e6b68 in virtio_blk_data_plane_handle_output
	    (vdev=0xaaaaf0374280, vq=0xffff700fc1d8) at ../hw/block/dataplane/virtio-blk.c:165
	#10 0x0000aaaae83878fc in virtio_queue_notify_aio_vq (vq=0xffff700fc1d8)
	    at ../hw/virtio/virtio.c:2325
	#11 0x0000aaaae838ab50 in virtio_queue_host_notifier_aio_poll (opaque=0xffff700fc250)
	    at ../hw/virtio/virtio.c:3545
	#12 0x0000aaaae85fab3c in run_poll_handlers_once
	    (ctx=0xaaaaef0a87b0, now=77604310618960, timeout=0xffff73ffdf78)
	    at ../util/aio-posix.c:398
	#13 0x0000aaaae85fae5c in run_poll_handlers
	    (ctx=0xaaaaef0a87b0, max_ns=4000, timeout=0xffff73ffdf78) at ../util/aio-posix.c:492
	#14 0x0000aaaae85fb078 in try_poll_mode (ctx=0xaaaaef0a87b0, timeout=0xffff73ffdf78)
	    at ../util/aio-posix.c:535
	#15 0x0000aaaae85fb180 in aio_poll (ctx=0xaaaaef0a87b0, blocking=true)
	    at ../util/aio-posix.c:571
	#16 0x0000aaaae8027004 in iothread_run (opaque=0xaaaaeee79a00) at ../iothread.c:73
	#17 0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef0a8d10)
	    at ../util/qemu-thread-posix.c:521
	#18 0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#19 0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 4 (LWP 4801):
	#0  0x0000ffff83086b54 in syscall () at /lib64/libc.so.6
	#1  0x0000ffff834598b8 in io_submit () at /lib64/libaio.so.1
	#2  0x0000aaaae851e89c in ioq_submit (s=0xfffd3c4328e0) at ../block/linux-aio.c:299
	#3  0x0000aaaae851eb50 in laio_io_unplug (bs=0xaaaaef0e3860, s=0xfffd3c4328e0)
	    at ../block/linux-aio.c:344
	#4  0x0000aaaae8559f1c in raw_aio_unplug (bs=0xaaaaef0e3860) at ../block/file-posix.c:2063
	#5  0x0000aaaae8538344 in bdrv_io_unplug (bs=0xaaaaef0e3860) at ../block/io.c:3135
	#6  0x0000aaaae8538360 in bdrv_io_unplug (bs=0xaaaaef0dc540) at ../block/io.c:3140
	#7  0x0000aaaae8496104 in blk_io_unplug (blk=0xaaaaef0aea90)
	    at ../block/block-backend.c:2147
	#8  0x0000aaaae830e1a4 in virtio_blk_handle_vq (s=0xaaaaf0252b90, vq=0xffff70224010)
	    at ../hw/block/virtio-blk.c:796
	#9  0x0000aaaae82e6b68 in virtio_blk_data_plane_handle_output
	    (vdev=0xaaaaf0252b90, vq=0xffff70224010) at ../hw/block/dataplane/virtio-blk.c:165
	#10 0x0000aaaae83878fc in virtio_queue_notify_aio_vq (vq=0xffff70224010)
	    at ../hw/virtio/virtio.c:2325
	#11 0x0000aaaae838ab50 in virtio_queue_host_notifier_aio_poll (opaque=0xffff70224088)
	    at ../hw/virtio/virtio.c:3545
	#12 0x0000aaaae85fa360 in poll_set_started (ctx=0xaaaaef0a7e70, started=false)
	    at ../util/aio-posix.c:231
	#13 0x0000aaaae85fb098 in try_poll_mode (ctx=0xaaaaef0a7e70, timeout=0xffff8096bf78)
	    at ../util/aio-posix.c:540
	#14 0x0000aaaae85fb180 in aio_poll (ctx=0xaaaaef0a7e70, blocking=true)
	    at ../util/aio-posix.c:571
	#15 0x0000aaaae8027004 in iothread_run (opaque=0xaaaaeee8de20) at ../iothread.c:73
	#16 0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef0a83d0)
	    at ../util/qemu-thread-posix.c:521
	#17 0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#18 0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 3 (LWP 4800):
	#0  0x0000ffff82fe8fd4 in sigtimedwait () at /lib64/libc.so.6
	#1  0x0000ffff8314d78c in sigwait () at /lib64/libpthread.so.0
	#2  0x0000aaaae860e974 in sigwait_compat (opaque=0xaaaaeee06e70) at ../util/compatfd.c:37
	#3  0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef092860)
	    at ../util/qemu-thread-posix.c:521
	#4  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#5  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 2 (LWP 4779):
	#0  0x0000ffff83086b50 in syscall () at /lib64/libc.so.6
	#1  0x0000aaaae85f2278 in qemu_futex_wait
	    (f=0xaaaae8f78a58 <rcu_call_ready_event>, val=4294967295)
	    at /Images/eillon/CODE/5-opensource/qemu/include/qemu/futex.h:29
	#2  0x0000aaaae85f24f4 in qemu_event_wait (ev=0xaaaae8f78a58 <rcu_call_ready_event>)
	    at ../util/qemu-thread-posix.c:460
	#3  0x0000aaaae85d2938 in call_rcu_thread (opaque=0x0) at ../util/rcu.c:258
	#4  0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaeed38e20)
	    at ../util/qemu-thread-posix.c:521
	#5  0x0000ffff831428bc in  () at /lib64/libpthread.so.0
	#6  0x0000ffff8308aa1c in  () at /lib64/libc.so.6

	Thread 1 (LWP 4720):
	#0  0x0000ffff8314c1cc in  () at /lib64/libpthread.so.0
	#1  0x0000ffff83145060 in pthread_mutex_lock () at /lib64/libpthread.so.0
	#2  0x0000aaaae85f1848 in qemu_mutex_lock_impl
	    (mutex=0xaaaaef0a8810, file=0xaaaae89026d0 "../util/async.c", line=615)
	    at ../util/qemu-thread-posix.c:79
	#3  0x0000aaaae86089d4 in aio_context_acquire (ctx=0xaaaaef0a87b0) at ../util/async.c:615
	#4  0x0000aaaae84df9b8 in bdrv_query_image_info
	    (bs=0xaaaaef0eb020, p_info=0xaaaaef13c188, errp=0xffffeb48ee60) at ../block/qapi.c:278
	#5  0x0000aaaae84df4e8 in bdrv_block_device_info
	    (blk=0xaaaaef0e8f20, bs=0xaaaaef0eb020, flat=false, errp=0xffffeb48f008)
	    at ../block/qapi.c:155
	#6  0x0000aaaae84dfed0 in bdrv_query_info
	    (blk=0xaaaaef0e8f20, p_info=0xaaaaef5109f8, errp=0xffffeb48f008)
	    at ../block/qapi.c:402
	#7  0x0000aaaae84e073c in qmp_query_block (errp=0x0) at ../block/qapi.c:621
	#8  0x0000aaaae7e4e254 in hmp_info_block (mon=0xffffeb48f120, qdict=0xaaaaf08757b0)
	    at ../block/monitor/block-hmp-cmds.c:755
	#9  0x0000aaaae7d38d58 in handle_hmp_command
	    (mon=0xffffeb48f120, cmdline=0xaaaaef63093a "") at ../monitor/hmp.c:1082
	#10 0x0000aaaae8305ac0 in qmp_human_monitor_command
	    (command_line=0xaaaaef630930 "info block", has_cpu_index=false, cpu_index=0, errp=0xffffeb48f228) at ../monitor/misc.c:141
	#11 0x0000aaaae85ad944 in qmp_marshal_human_monitor_command
	    (args=0xffff5c0088b0, ret=0xffffeb48f2c0, errp=0xffffeb48f2b8)
	    at qapi/qapi-commands-misc.c:653
	#12 0x0000aaaae861be8c in qmp_dispatch
	    (cmds=0xaaaae8f76718 <qmp_commands>, request=0xffff5c007890, allow_oob=false)
	    at ../qapi/qmp-dispatch.c:155
	#13 0x0000aaaae844d204 in monitor_qmp_dispatch (mon=0xaaaaef10d460, req=0xffff5c007890)
	    at ../monitor/qmp.c:145
	#14 0x0000aaaae844d620 in monitor_qmp_bh_dispatcher (data=0x0) at ../monitor/qmp.c:234
	#15 0x0000aaaae8607a44 in aio_bh_call (bh=0xaaaaeed99790) at ../util/async.c:136
	#16 0x0000aaaae8607b50 in aio_bh_poll (ctx=0xaaaaeedd6e50) at ../util/async.c:164
	#17 0x0000aaaae85faacc in aio_dispatch (ctx=0xaaaaeedd6e50) at ../util/aio-posix.c:380
	#18 0x0000aaaae8608030 in aio_ctx_dispatch
	    (source=0xaaaaeedd6e50, callback=0x0, user_data=0x0) at ../util/async.c:306
	#19 0x0000ffff838072f8 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
	#20 0x0000aaaae85d8ed8 in glib_pollfds_poll () at ../util/main-loop.c:221
	#21 0x0000aaaae85d8f60 in os_host_main_loop_wait (timeout=64819060)
	    at ../util/main-loop.c:244
	#22 0x0000aaaae85d9080 in main_loop_wait (nonblocking=0) at ../util/main-loop.c:520
	#23 0x0000aaaae82ec2b4 in qemu_main_loop () at ../softmmu/vl.c:1676
	#24 0x0000aaaae7c9b9c4 in main (argc=103, argv=0xffffeb48f7b8, envp=0xffffeb48faf8)
	    at ../softmmu/main.c:50

Thanks,
Zhenyu


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-09-18 11:23             ` Zhenyu Ye
@ 2020-09-18 14:06               ` Fam Zheng
  2020-09-19  2:22                 ` Zhenyu Ye
  0 siblings, 1 reply; 28+ messages in thread
From: Fam Zheng @ 2020-09-18 14:06 UTC (permalink / raw)
  To: Zhenyu Ye
  Cc: Kevin Wolf, qemu-block, armbru, xiexiangyou, qemu-devel,
	Stefan Hajnoczi, pbonzini, mreitz

On 2020-09-18 19:23, Zhenyu Ye wrote:
> 	Thread 5 (LWP 4802):
> 	#0  0x0000ffff83086b54 in syscall () at /lib64/libc.so.6
> 	#1  0x0000ffff834598b8 in io_submit () at /lib64/libaio.so.1
> 	#2  0x0000aaaae851e89c in ioq_submit (s=0xfffd3c001bb0) at ../block/linux-aio.c:299
> 	#3  0x0000aaaae851eb50 in laio_io_unplug (bs=0xaaaaef0f2340, s=0xfffd3c001bb0)
> 	    at ../block/linux-aio.c:344
> 	#4  0x0000aaaae8559f1c in raw_aio_unplug (bs=0xaaaaef0f2340) at ../block/file-posix.c:2063
> 	#5  0x0000aaaae8538344 in bdrv_io_unplug (bs=0xaaaaef0f2340) at ../block/io.c:3135
> 	#6  0x0000aaaae8538360 in bdrv_io_unplug (bs=0xaaaaef0eb020) at ../block/io.c:3140
> 	#7  0x0000aaaae8496104 in blk_io_unplug (blk=0xaaaaef0e8f20)
> 	    at ../block/block-backend.c:2147
> 	#8  0x0000aaaae830e1a4 in virtio_blk_handle_vq (s=0xaaaaf0374280, vq=0xffff700fc1d8)
> 	    at ../hw/block/virtio-blk.c:796
> 	#9  0x0000aaaae82e6b68 in virtio_blk_data_plane_handle_output
> 	    (vdev=0xaaaaf0374280, vq=0xffff700fc1d8) at ../hw/block/dataplane/virtio-blk.c:165
> 	#10 0x0000aaaae83878fc in virtio_queue_notify_aio_vq (vq=0xffff700fc1d8)
> 	    at ../hw/virtio/virtio.c:2325
> 	#11 0x0000aaaae838ab50 in virtio_queue_host_notifier_aio_poll (opaque=0xffff700fc250)
> 	    at ../hw/virtio/virtio.c:3545
> 	#12 0x0000aaaae85fab3c in run_poll_handlers_once
> 	    (ctx=0xaaaaef0a87b0, now=77604310618960, timeout=0xffff73ffdf78)
> 	    at ../util/aio-posix.c:398
> 	#13 0x0000aaaae85fae5c in run_poll_handlers
> 	    (ctx=0xaaaaef0a87b0, max_ns=4000, timeout=0xffff73ffdf78) at ../util/aio-posix.c:492
> 	#14 0x0000aaaae85fb078 in try_poll_mode (ctx=0xaaaaef0a87b0, timeout=0xffff73ffdf78)
> 	    at ../util/aio-posix.c:535
> 	#15 0x0000aaaae85fb180 in aio_poll (ctx=0xaaaaef0a87b0, blocking=true)
> 	    at ../util/aio-posix.c:571
> 	#16 0x0000aaaae8027004 in iothread_run (opaque=0xaaaaeee79a00) at ../iothread.c:73
> 	#17 0x0000aaaae85f269c in qemu_thread_start (args=0xaaaaef0a8d10)
> 	    at ../util/qemu-thread-posix.c:521
> 	#18 0x0000ffff831428bc in  () at /lib64/libpthread.so.0
> 	#19 0x0000ffff8308aa1c in  () at /lib64/libc.so.6

I can see how blocking in a slow io_submit can cause trouble for main
thread. I think one way to fix it (until it's made truly async in new
kernels) is moving the io_submit call to thread pool, and wrapped in a
coroutine, perhaps.

I'm not sure qmp timeout is a complete solution because we would still
suffer from a blocked state for a period, in this exact situation before
the timeout.

Fam


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-09-18 14:06               ` Fam Zheng
@ 2020-09-19  2:22                 ` Zhenyu Ye
  2020-09-21 11:14                   ` Fam Zheng
  0 siblings, 1 reply; 28+ messages in thread
From: Zhenyu Ye @ 2020-09-19  2:22 UTC (permalink / raw)
  To: Fam Zheng
  Cc: Kevin Wolf, qemu-block, armbru, xiexiangyou, qemu-devel,
	Stefan Hajnoczi, pbonzini, mreitz

On 2020/9/18 22:06, Fam Zheng wrote:
> 
> I can see how blocking in a slow io_submit can cause trouble for main
> thread. I think one way to fix it (until it's made truly async in new
> kernels) is moving the io_submit call to thread pool, and wrapped in a
> coroutine, perhaps.
>

I'm not sure if any other operation will block the main thread, other
than io_submit().

> I'm not sure qmp timeout is a complete solution because we would still
> suffer from a blocked state for a period, in this exact situation before
> the timeout.

Anyway, the qmp timeout may be the last measure to prevent the VM
soft lockup.  Ideally, after your fix of io_submit, this mechanism will
never be triggered.

Thanks,
Zhenyu


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-09-19  2:22                 ` Zhenyu Ye
@ 2020-09-21 11:14                   ` Fam Zheng
  2020-10-13 10:00                     ` Stefan Hajnoczi
  0 siblings, 1 reply; 28+ messages in thread
From: Fam Zheng @ 2020-09-21 11:14 UTC (permalink / raw)
  To: Zhenyu Ye
  Cc: Kevin Wolf, qemu-block, qemu-devel, xiexiangyou, armbru,
	Stefan Hajnoczi, pbonzini, mreitz

On 2020-09-19 10:22, Zhenyu Ye wrote:
> On 2020/9/18 22:06, Fam Zheng wrote:
> > 
> > I can see how blocking in a slow io_submit can cause trouble for main
> > thread. I think one way to fix it (until it's made truly async in new
> > kernels) is moving the io_submit call to thread pool, and wrapped in a
> > coroutine, perhaps.
> >
> 
> I'm not sure if any other operation will block the main thread, other
> than io_submit().

Then that's a problem with io_submit which should be fixed. Or more
precisely, that is a long held lock that we should avoid in QEMU's event
loops.

> 
> > I'm not sure qmp timeout is a complete solution because we would still
> > suffer from a blocked state for a period, in this exact situation before
> > the timeout.
> 
> Anyway, the qmp timeout may be the last measure to prevent the VM
> soft lockup. 

Maybe, but I don't think baking such a workaround into the QMP API is a
good idea. No QMP command should be synchronously long running, so
having a timeout parameter is just a wrong design.

Thanks,

Fam


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-09-21 11:14                   ` Fam Zheng
@ 2020-10-13 10:00                     ` Stefan Hajnoczi
  2020-10-19 12:40                       ` Zhenyu Ye
  0 siblings, 1 reply; 28+ messages in thread
From: Stefan Hajnoczi @ 2020-10-13 10:00 UTC (permalink / raw)
  To: Zhenyu Ye
  Cc: fam, Kevin Wolf, qemu-block, qemu-devel, xiexiangyou, armbru,
	pbonzini, mreitz

[-- Attachment #1: Type: text/plain, Size: 1724 bytes --]

On Mon, Sep 21, 2020 at 11:14:35AM +0000, Fam Zheng wrote:
> On 2020-09-19 10:22, Zhenyu Ye wrote:
> > On 2020/9/18 22:06, Fam Zheng wrote:
> > > 
> > > I can see how blocking in a slow io_submit can cause trouble for main
> > > thread. I think one way to fix it (until it's made truly async in new
> > > kernels) is moving the io_submit call to thread pool, and wrapped in a
> > > coroutine, perhaps.
> > >
> > 
> > I'm not sure if any other operation will block the main thread, other
> > than io_submit().
> 
> Then that's a problem with io_submit which should be fixed. Or more
> precisely, that is a long held lock that we should avoid in QEMU's event
> loops.
> 
> > 
> > > I'm not sure qmp timeout is a complete solution because we would still
> > > suffer from a blocked state for a period, in this exact situation before
> > > the timeout.
> > 
> > Anyway, the qmp timeout may be the last measure to prevent the VM
> > soft lockup. 
> 
> Maybe, but I don't think baking such a workaround into the QMP API is a
> good idea. No QMP command should be synchronously long running, so
> having a timeout parameter is just a wrong design.

Sorry, I lost track of this on-going email thread.

Thanks for the backtrace. It shows the io_submit call is done while the
AioContext lock is held. The monitor thread is waiting for the
IOThread's AioContext lock. vcpus threads can get stuck waiting on the
big QEMU lock (BQL) that is held by the monitor in the meantime.

Please collect the kernel backtrace for io_submit so we can understand
why multi-second io_submit latencies happen.

I also suggest trying aio=io_uring to check if Linux io_uring avoids the
latency problem.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-10-13 10:00                     ` Stefan Hajnoczi
@ 2020-10-19 12:40                       ` Zhenyu Ye
  2020-10-19 13:25                         ` Paolo Bonzini
  0 siblings, 1 reply; 28+ messages in thread
From: Zhenyu Ye @ 2020-10-19 12:40 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: fam, Kevin Wolf, Zhanghailiang, qemu-block, qemu-devel,
	xiexiangyou, armbru, pbonzini, mreitz

Hi Stefan,

On 2020/10/13 18:00, Stefan Hajnoczi wrote:
> 
> Sorry, I lost track of this on-going email thread.
> 
> Thanks for the backtrace. It shows the io_submit call is done while the
> AioContext lock is held. The monitor thread is waiting for the
> IOThread's AioContext lock. vcpus threads can get stuck waiting on the
> big QEMU lock (BQL) that is held by the monitor in the meantime.
> 
> Please collect the kernel backtrace for io_submit so we can understand
> why multi-second io_submit latencies happen.
> 
> I also suggest trying aio=io_uring to check if Linux io_uring avoids the
> latency problem.
> 

The kernel backtrace for io_submit in GUEST is:

	guest# ./offcputime -K -p `pgrep -nx fio`
	    b'finish_task_switch'
	    b'__schedule'
	    b'schedule'
	    b'io_schedule'
	    b'blk_mq_get_tag'
	    b'blk_mq_get_request'
	    b'blk_mq_make_request'
	    b'generic_make_request'
	    b'submit_bio'
	    b'blkdev_direct_IO'
	    b'generic_file_read_iter'
	    b'aio_read'
	    b'io_submit_one'
	    b'__x64_sys_io_submit'
	    b'do_syscall_64'
	    b'entry_SYSCALL_64_after_hwframe'
	    -                fio (1464)
		40031912

And Linux io_uring can avoid the latency problem.

Thanks,
Zhenyu


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-10-19 12:40                       ` Zhenyu Ye
@ 2020-10-19 13:25                         ` Paolo Bonzini
  2020-10-20  1:34                           ` Zhenyu Ye
  0 siblings, 1 reply; 28+ messages in thread
From: Paolo Bonzini @ 2020-10-19 13:25 UTC (permalink / raw)
  To: Zhenyu Ye, Stefan Hajnoczi
  Cc: fam, Kevin Wolf, Zhanghailiang, qemu-block, qemu-devel,
	xiexiangyou, armbru, mreitz

On 19/10/20 14:40, Zhenyu Ye wrote:
> The kernel backtrace for io_submit in GUEST is:
> 
> 	guest# ./offcputime -K -p `pgrep -nx fio`
> 	    b'finish_task_switch'
> 	    b'__schedule'
> 	    b'schedule'
> 	    b'io_schedule'
> 	    b'blk_mq_get_tag'
> 	    b'blk_mq_get_request'
> 	    b'blk_mq_make_request'
> 	    b'generic_make_request'
> 	    b'submit_bio'
> 	    b'blkdev_direct_IO'
> 	    b'generic_file_read_iter'
> 	    b'aio_read'
> 	    b'io_submit_one'
> 	    b'__x64_sys_io_submit'
> 	    b'do_syscall_64'
> 	    b'entry_SYSCALL_64_after_hwframe'
> 	    -                fio (1464)
> 		40031912
> 
> And Linux io_uring can avoid the latency problem.

What filesystem are you using?

Paolo



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-10-19 13:25                         ` Paolo Bonzini
@ 2020-10-20  1:34                           ` Zhenyu Ye
  2020-10-22 16:29                             ` Fam Zheng
  0 siblings, 1 reply; 28+ messages in thread
From: Zhenyu Ye @ 2020-10-20  1:34 UTC (permalink / raw)
  To: Paolo Bonzini, Stefan Hajnoczi
  Cc: fam, Kevin Wolf, Zhanghailiang, qemu-block, qemu-devel,
	xiexiangyou, armbru, mreitz

On 2020/10/19 21:25, Paolo Bonzini wrote:
> On 19/10/20 14:40, Zhenyu Ye wrote:
>> The kernel backtrace for io_submit in GUEST is:
>>
>> 	guest# ./offcputime -K -p `pgrep -nx fio`
>> 	    b'finish_task_switch'
>> 	    b'__schedule'
>> 	    b'schedule'
>> 	    b'io_schedule'
>> 	    b'blk_mq_get_tag'
>> 	    b'blk_mq_get_request'
>> 	    b'blk_mq_make_request'
>> 	    b'generic_make_request'
>> 	    b'submit_bio'
>> 	    b'blkdev_direct_IO'
>> 	    b'generic_file_read_iter'
>> 	    b'aio_read'
>> 	    b'io_submit_one'
>> 	    b'__x64_sys_io_submit'
>> 	    b'do_syscall_64'
>> 	    b'entry_SYSCALL_64_after_hwframe'
>> 	    -                fio (1464)
>> 		40031912
>>
>> And Linux io_uring can avoid the latency problem.
> 
> What filesystem are you using?
> 

On host, the VM image and disk images are based on ext4 filesystem.
In guest, the '/' uses xfs filesystem, and the disks are raw devices.

guest# df -hT
Filesystem              Type      Size  Used Avail Use% Mounted on
devtmpfs                devtmpfs   16G     0   16G   0% /dev
tmpfs                   tmpfs      16G     0   16G   0% /dev/shm
tmpfs                   tmpfs      16G  976K   16G   1% /run
/dev/mapper/fedora-root xfs       8.0G  3.2G  4.9G  40% /
tmpfs                   tmpfs      16G     0   16G   0% /tmp
/dev/sda1               xfs      1014M  181M  834M  18% /boot
tmpfs                   tmpfs     3.2G     0  3.2G   0% /run/user/0

guest# lsblk
NAME            MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda               8:0    0  10G  0 disk
├─sda1            8:1    0   1G  0 part /boot
└─sda2            8:2    0   9G  0 part
  ├─fedora-root 253:0    0   8G  0 lvm  /
  └─fedora-swap 253:1    0   1G  0 lvm  [SWAP]
vda             252:0    0  10G  0 disk
vdb             252:16   0  10G  0 disk
vdc             252:32   0  10G  0 disk
vdd             252:48   0  10G  0 disk

Thanks,
Zhenyu


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-10-20  1:34                           ` Zhenyu Ye
@ 2020-10-22 16:29                             ` Fam Zheng
  2020-12-08 13:10                               ` Stefan Hajnoczi
  0 siblings, 1 reply; 28+ messages in thread
From: Fam Zheng @ 2020-10-22 16:29 UTC (permalink / raw)
  To: Zhenyu Ye, Paolo Bonzini, Stefan Hajnoczi
  Cc: Kevin Wolf, Zhanghailiang, qemu-block, armbru, xiexiangyou,
	qemu-devel, mreitz

On Tue, 2020-10-20 at 09:34 +0800, Zhenyu Ye wrote:
> On 2020/10/19 21:25, Paolo Bonzini wrote:
> > On 19/10/20 14:40, Zhenyu Ye wrote:
> > > The kernel backtrace for io_submit in GUEST is:
> > > 
> > > 	guest# ./offcputime -K -p `pgrep -nx fio`
> > > 	    b'finish_task_switch'
> > > 	    b'__schedule'
> > > 	    b'schedule'
> > > 	    b'io_schedule'
> > > 	    b'blk_mq_get_tag'
> > > 	    b'blk_mq_get_request'
> > > 	    b'blk_mq_make_request'
> > > 	    b'generic_make_request'
> > > 	    b'submit_bio'
> > > 	    b'blkdev_direct_IO'
> > > 	    b'generic_file_read_iter'
> > > 	    b'aio_read'
> > > 	    b'io_submit_one'
> > > 	    b'__x64_sys_io_submit'
> > > 	    b'do_syscall_64'
> > > 	    b'entry_SYSCALL_64_after_hwframe'
> > > 	    -                fio (1464)
> > > 		40031912
> > > 
> > > And Linux io_uring can avoid the latency problem.

Thanks for the info. What this tells us is basically the inflight
requests are high. It's sad that the linux-aio is in practice
implemented as a blocking API.

Host side backtrace will be of more help. Can you get that too?

Fam

> > 
> > What filesystem are you using?
> > 
> 
> On host, the VM image and disk images are based on ext4 filesystem.
> In guest, the '/' uses xfs filesystem, and the disks are raw devices.
> 
> guest# df -hT
> Filesystem              Type      Size  Used Avail Use% Mounted on
> devtmpfs                devtmpfs   16G     0   16G   0% /dev
> tmpfs                   tmpfs      16G     0   16G   0% /dev/shm
> tmpfs                   tmpfs      16G  976K   16G   1% /run
> /dev/mapper/fedora-root xfs       8.0G  3.2G  4.9G  40% /
> tmpfs                   tmpfs      16G     0   16G   0% /tmp
> /dev/sda1               xfs      1014M  181M  834M  18% /boot
> tmpfs                   tmpfs     3.2G     0  3.2G   0% /run/user/0
> 
> guest# lsblk
> NAME            MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
> sda               8:0    0  10G  0 disk
> ├─sda1            8:1    0   1G  0 part /boot
> └─sda2            8:2    0   9G  0 part
>   ├─fedora-root 253:0    0   8G  0 lvm  /
>   └─fedora-swap 253:1    0   1G  0 lvm  [SWAP]
> vda             252:0    0  10G  0 disk
> vdb             252:16   0  10G  0 disk
> vdc             252:32   0  10G  0 disk
> vdd             252:48   0  10G  0 disk
> 
> Thanks,
> Zhenyu
> 



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-10-22 16:29                             ` Fam Zheng
@ 2020-12-08 13:10                               ` Stefan Hajnoczi
  2020-12-08 13:47                                 ` Glauber Costa
  0 siblings, 1 reply; 28+ messages in thread
From: Stefan Hajnoczi @ 2020-12-08 13:10 UTC (permalink / raw)
  To: Fam Zheng
  Cc: Kevin Wolf, Zhanghailiang, qemu-block, Zhenyu Ye, qemu-devel,
	xiexiangyou, armbru, Glauber Costa, Paolo Bonzini, mreitz

[-- Attachment #1: Type: text/plain, Size: 1981 bytes --]

On Thu, Oct 22, 2020 at 05:29:16PM +0100, Fam Zheng wrote:
> On Tue, 2020-10-20 at 09:34 +0800, Zhenyu Ye wrote:
> > On 2020/10/19 21:25, Paolo Bonzini wrote:
> > > On 19/10/20 14:40, Zhenyu Ye wrote:
> > > > The kernel backtrace for io_submit in GUEST is:
> > > > 
> > > > 	guest# ./offcputime -K -p `pgrep -nx fio`
> > > > 	    b'finish_task_switch'
> > > > 	    b'__schedule'
> > > > 	    b'schedule'
> > > > 	    b'io_schedule'
> > > > 	    b'blk_mq_get_tag'
> > > > 	    b'blk_mq_get_request'
> > > > 	    b'blk_mq_make_request'
> > > > 	    b'generic_make_request'
> > > > 	    b'submit_bio'
> > > > 	    b'blkdev_direct_IO'
> > > > 	    b'generic_file_read_iter'
> > > > 	    b'aio_read'
> > > > 	    b'io_submit_one'
> > > > 	    b'__x64_sys_io_submit'
> > > > 	    b'do_syscall_64'
> > > > 	    b'entry_SYSCALL_64_after_hwframe'
> > > > 	    -                fio (1464)
> > > > 		40031912
> > > > 
> > > > And Linux io_uring can avoid the latency problem.
> 
> Thanks for the info. What this tells us is basically the inflight
> requests are high. It's sad that the linux-aio is in practice
> implemented as a blocking API.
> 
> Host side backtrace will be of more help. Can you get that too?

I guess Linux AIO didn't set the BLK_MQ_REQ_NOWAIT flag so the task went
to sleep when it ran out of blk-mq tags. The easiest solution is to move
to io_uring. Linux AIO is broken - it's not AIO :).

If we know that no other process is writing to the host block device
then maybe we can determine the blk-mq tags limit (the queue depth) and
avoid sending more requests. That way QEMU doesn't block, but I don't
think this approach works when other processes are submitting I/O to the
same host block device :(.

Fam's original suggestion of invoking io_submit(2) from a worker thread
is an option, but I'm afraid it will slow down the uncontended case.

I'm CCing Glauber in case he battled this in the past in ScyllaDB.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-12-08 13:10                               ` Stefan Hajnoczi
@ 2020-12-08 13:47                                 ` Glauber Costa
  2020-12-14 16:33                                   ` Stefan Hajnoczi
  0 siblings, 1 reply; 28+ messages in thread
From: Glauber Costa @ 2020-12-08 13:47 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Fam Zheng, Zhenyu Ye, Paolo Bonzini, Kevin Wolf, Zhanghailiang,
	qemu-block, qemu-devel, xiexiangyou, armbru, mreitz,
	Glauber Costa

On Tue, Dec 8, 2020 at 8:11 AM Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> On Thu, Oct 22, 2020 at 05:29:16PM +0100, Fam Zheng wrote:
> > On Tue, 2020-10-20 at 09:34 +0800, Zhenyu Ye wrote:
> > > On 2020/10/19 21:25, Paolo Bonzini wrote:
> > > > On 19/10/20 14:40, Zhenyu Ye wrote:
> > > > > The kernel backtrace for io_submit in GUEST is:
> > > > >
> > > > >         guest# ./offcputime -K -p `pgrep -nx fio`
> > > > >             b'finish_task_switch'
> > > > >             b'__schedule'
> > > > >             b'schedule'
> > > > >             b'io_schedule'
> > > > >             b'blk_mq_get_tag'
> > > > >             b'blk_mq_get_request'
> > > > >             b'blk_mq_make_request'
> > > > >             b'generic_make_request'
> > > > >             b'submit_bio'
> > > > >             b'blkdev_direct_IO'
> > > > >             b'generic_file_read_iter'
> > > > >             b'aio_read'
> > > > >             b'io_submit_one'
> > > > >             b'__x64_sys_io_submit'
> > > > >             b'do_syscall_64'
> > > > >             b'entry_SYSCALL_64_after_hwframe'
> > > > >             -                fio (1464)
> > > > >                 40031912
> > > > >
> > > > > And Linux io_uring can avoid the latency problem.
> >
> > Thanks for the info. What this tells us is basically the inflight
> > requests are high. It's sad that the linux-aio is in practice
> > implemented as a blocking API.

it is.

> >
> > Host side backtrace will be of more help. Can you get that too?
>
> I guess Linux AIO didn't set the BLK_MQ_REQ_NOWAIT flag so the task went
> to sleep when it ran out of blk-mq tags. The easiest solution is to move
> to io_uring. Linux AIO is broken - it's not AIO :).

Agree!
>
> If we know that no other process is writing to the host block device
> then maybe we can determine the blk-mq tags limit (the queue depth) and
> avoid sending more requests. That way QEMU doesn't block, but I don't
> think this approach works when other processes are submitting I/O to the
> same host block device :(.
>
> Fam's original suggestion of invoking io_submit(2) from a worker thread
> is an option, but I'm afraid it will slow down the uncontended case.
>
> I'm CCing Glauber in case he battled this in the past in ScyllaDB.

We have, and a lot. I don't recall seeing this particular lock, but
XFS would block us all the time
if it had to update metadata to submit the operation, lock inodes, etc.

The work we did at the time was in fixing those things in the kernel
as much as we could.
But the API is just like that...

>
> Stefan


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-12-08 13:47                                 ` Glauber Costa
@ 2020-12-14 16:33                                   ` Stefan Hajnoczi
  2020-12-21 11:30                                     ` Zhenyu Ye
  0 siblings, 1 reply; 28+ messages in thread
From: Stefan Hajnoczi @ 2020-12-14 16:33 UTC (permalink / raw)
  To: Glauber Costa
  Cc: Fam Zheng, Kevin Wolf, Zhanghailiang, qemu-block, Zhenyu Ye,
	qemu-devel, xiexiangyou, armbru, Glauber Costa, Paolo Bonzini,
	mreitz

[-- Attachment #1: Type: text/plain, Size: 209 bytes --]

On Tue, Dec 08, 2020 at 08:47:42AM -0500, Glauber Costa wrote:
> The work we did at the time was in fixing those things in the kernel
> as much as we could.
> But the API is just like that...

Thanks!

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 0/2] Add timeout mechanism to qmp actions
  2020-12-14 16:33                                   ` Stefan Hajnoczi
@ 2020-12-21 11:30                                     ` Zhenyu Ye
  0 siblings, 0 replies; 28+ messages in thread
From: Zhenyu Ye @ 2020-12-21 11:30 UTC (permalink / raw)
  To: Stefan Hajnoczi, Glauber Costa
  Cc: Fam Zheng, Kevin Wolf, Zhanghailiang, qemu-block, qemu-devel,
	xiexiangyou, armbru, Glauber Costa, Paolo Bonzini, mreitz

On 2020/12/15 0:33, Stefan Hajnoczi wrote:
> On Tue, Dec 08, 2020 at 08:47:42AM -0500, Glauber Costa wrote:
>> The work we did at the time was in fixing those things in the kernel
>> as much as we could.
>> But the API is just like that...
> 

The best way for us is to replace io_submit with io_uring.

Thanks for the discussion!

Zhenyu




^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2020-12-21 11:32 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-10 14:52 [PATCH v1 0/2] Add timeout mechanism to qmp actions Zhenyu Ye
2020-08-10 14:52 ` [PATCH v1 1/2] util: introduce aio_context_acquire_timeout Zhenyu Ye
2020-08-10 14:52 ` [PATCH v1 2/2] qmp: use aio_context_acquire_timeout replace aio_context_acquire Zhenyu Ye
2020-08-10 15:38 ` [PATCH v1 0/2] Add timeout mechanism to qmp actions Kevin Wolf
2020-08-11 13:54   ` Zhenyu Ye
2020-08-21 12:52     ` Stefan Hajnoczi
2020-09-14 13:27     ` Stefan Hajnoczi
2020-09-17  7:36       ` Zhenyu Ye
2020-09-17 10:10         ` Fam Zheng
2020-09-17 15:44         ` Stefan Hajnoczi
2020-09-17 16:01           ` Fam Zheng
2020-09-18 11:23             ` Zhenyu Ye
2020-09-18 14:06               ` Fam Zheng
2020-09-19  2:22                 ` Zhenyu Ye
2020-09-21 11:14                   ` Fam Zheng
2020-10-13 10:00                     ` Stefan Hajnoczi
2020-10-19 12:40                       ` Zhenyu Ye
2020-10-19 13:25                         ` Paolo Bonzini
2020-10-20  1:34                           ` Zhenyu Ye
2020-10-22 16:29                             ` Fam Zheng
2020-12-08 13:10                               ` Stefan Hajnoczi
2020-12-08 13:47                                 ` Glauber Costa
2020-12-14 16:33                                   ` Stefan Hajnoczi
2020-12-21 11:30                                     ` Zhenyu Ye
2020-09-14 14:42     ` Daniel P. Berrangé
2020-09-17  8:12       ` Zhenyu Ye
2020-08-12 13:51 ` Stefan Hajnoczi
2020-08-13  1:51   ` Zhenyu Ye

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.