[PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
@ 2020-05-11 11:14 Lukas Straub
  2020-05-11 11:14 ` [PATCH 1/5] Introduce yank feature Lukas Straub
                   ` (6 more replies)
  0 siblings, 7 replies; 52+ messages in thread
From: Lukas Straub @ 2020-05-11 11:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, Dr. David Alan Gilbert, Max Reitz,
	Paolo Bonzini, Marc-André Lureau

[-- Attachment #1: Type: text/plain, Size: 1614 bytes --]

Hello Everyone,
In many cases, if qemu has a network connection (qmp, migration, chardev, etc.)
to some other server and that server dies or hangs, qemu hangs too.
These patches introduce the new 'yank' out-of-band qmp command to recover from
these kinds of hangs. The different subsystems register callbacks which get
executed with the yank command. For example the callback can shutdown() a
socket. This is intended for the colo use-case, but it can be used for other
things too of course.

Regards,
Lukas Straub

Lukas Straub (5):
  Introduce yank feature
  io/channel.c,io/channel-socket.c: Add yank feature
  block/nbd.c: Add yank feature
  chardev/char-socket.c: Add yank feature
  migration: Add yank feature

 Makefile.objs               |  2 +
 block/nbd.c                 | 68 ++++++++++++++++++++++++---------
 chardev/char-socket.c       |  8 ++++
 chardev/char.c              |  3 ++
 include/io/channel-socket.h |  1 +
 include/io/channel.h        | 12 ++++++
 io/channel-socket.c         | 29 ++++++++++++++
 io/channel.c                |  9 +++++
 migration/channel.c         |  2 +
 migration/migration.c       | 11 ++++++
 qapi/block-core.json        |  5 ++-
 qapi/char.json              |  5 ++-
 qapi/migration.json         | 17 +++++++--
 qapi/misc.json              | 15 ++++++++
 softmmu/vl.c                |  2 +
 yank.c                      | 75 +++++++++++++++++++++++++++++++++++++
 yank.h                      | 12 ++++++
 17 files changed, 254 insertions(+), 22 deletions(-)
 create mode 100644 yank.c
 create mode 100644 yank.h

-- 
2.20.1

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [PATCH 1/5] Introduce yank feature
  2020-05-11 11:14 [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Lukas Straub
@ 2020-05-11 11:14 ` Lukas Straub
  2020-05-11 11:14 ` [PATCH 2/5] io/channel.c,io/channel-socket.c: Add " Lukas Straub
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 52+ messages in thread
From: Lukas Straub @ 2020-05-11 11:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, Dr. David Alan Gilbert, Max Reitz,
	Paolo Bonzini, Marc-André Lureau

[-- Attachment #1: Type: text/plain, Size: 3846 bytes --]

The yank feature allows to recover from hanging qemu by "yanking"
at various parts. Other qemu systems can register yank functions
which will be called by the 'yank' out-of-band qmp command.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 qapi/misc.json | 15 ++++++++++
 softmmu/vl.c   |  2 ++
 yank.c         | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++
 yank.h         | 12 ++++++++
 4 files changed, 104 insertions(+)
 create mode 100644 yank.c
 create mode 100644 yank.h

diff --git a/qapi/misc.json b/qapi/misc.json
index 99b90ac80b..de1ee494ae 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -1550,3 +1550,18 @@
 ##
 { 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }
 
+##
+# @yank:
+#
+# Recover from hanging qemu by calling yank functions.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "yank" }
+# <- { "return": {} }
+#
+# Since: 5.1
+##
+{ 'command': 'yank', 'allow-oob': true }
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 32c0047889..5d99749d29 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -112,6 +112,7 @@
 #include "qapi/qmp/qerror.h"
 #include "sysemu/iothread.h"
 #include "qemu/guest-random.h"
+#include "yank.h"
 
 #define MAX_VIRTIO_CONSOLES 1
 
@@ -2906,6 +2907,7 @@ void qemu_init(int argc, char **argv, char **envp)
     precopy_infrastructure_init();
     postcopy_infrastructure_init();
     monitor_init_globals();
+    yank_init();
 
     if (qcrypto_init(&err) < 0) {
         error_reportf_err(err, "cannot initialize crypto: ");
diff --git a/yank.c b/yank.c
new file mode 100644
index 0000000000..cefbfd8ab5
--- /dev/null
+++ b/yank.c
@@ -0,0 +1,75 @@
+/*
+ * QEMU yank feature
+ *
+ * Copyright (c) Lukas Straub <lukasstraub2@web.de>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/thread.h"
+#include "qemu/queue.h"
+#include "yank.h"
+
+struct YankFuncAndParam {
+    YankFn *func;
+    void *opaque;
+    QLIST_ENTRY(YankFuncAndParam) next;
+};
+
+static QemuMutex lock;
+static QLIST_HEAD(qlisthead, YankFuncAndParam) head
+    = QLIST_HEAD_INITIALIZER(head);
+
+void yank_register_function(YankFn *func, void *opaque)
+{
+    struct YankFuncAndParam *tmp = g_malloc(sizeof(struct YankFuncAndParam));
+    tmp->func = func;
+    tmp->opaque = opaque;
+
+    qemu_mutex_lock(&lock);
+    QLIST_INSERT_HEAD(&head, tmp, next);
+    qemu_mutex_unlock(&lock);
+}
+
+void yank_unregister_function(YankFn *func, void *opaque)
+{
+    qemu_mutex_lock(&lock);
+
+    struct YankFuncAndParam *tmp;
+    QLIST_FOREACH(tmp, &head, next) {
+        if (tmp->func == func && tmp->opaque == opaque) {
+            QLIST_REMOVE(tmp, next);
+            g_free(tmp);
+            qemu_mutex_unlock(&lock);
+            return;
+        }
+    }
+
+    abort();
+}
+
+void yank_call_functions(void)
+{
+    qemu_mutex_lock(&lock);
+
+    struct YankFuncAndParam *tmp;
+    QLIST_FOREACH(tmp, &head, next) {
+        tmp->func(tmp->opaque);
+    }
+
+    qemu_mutex_unlock(&lock);
+}
+
+void qmp_yank(Error **errp)
+{
+    yank_call_functions();
+}
+
+void yank_init(void)
+{
+    qemu_mutex_init(&lock);
+    QLIST_INIT(&head);
+}
diff --git a/yank.h b/yank.h
new file mode 100644
index 0000000000..7376224219
--- /dev/null
+++ b/yank.h
@@ -0,0 +1,12 @@
+
+#ifndef YANK_H
+#define YANK_H
+
+typedef void (YankFn) (void *opaque);
+
+void yank_register_function(YankFn *func, void *opaque);
+void yank_unregister_function(YankFn *func, void *opaque);
+void yank_call_functions(void);
+void yank_init(void);
+void qmp_yank(Error **errp);
+#endif
-- 
2.20.1


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 2/5] io/channel.c,io/channel-socket.c: Add yank feature
  2020-05-11 11:14 [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Lukas Straub
  2020-05-11 11:14 ` [PATCH 1/5] Introduce yank feature Lukas Straub
@ 2020-05-11 11:14 ` Lukas Straub
  2020-05-11 11:51   ` Daniel P. Berrangé
  2020-05-11 11:14 ` [PATCH 3/5] block/nbd.c: " Lukas Straub
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 52+ messages in thread
From: Lukas Straub @ 2020-05-11 11:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, Dr. David Alan Gilbert, Max Reitz,
	Paolo Bonzini, Marc-André Lureau

[-- Attachment #1: Type: text/plain, Size: 5027 bytes --]

Add qio_channel_set_yank function to channel and to channel-socket,
which will register a yank function. The yank function calls
shutdown() on the socket.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 Makefile.objs               |  1 +
 include/io/channel-socket.h |  1 +
 include/io/channel.h        | 12 ++++++++++++
 io/channel-socket.c         | 29 +++++++++++++++++++++++++++++
 io/channel.c                |  9 +++++++++
 5 files changed, 52 insertions(+)

diff --git a/Makefile.objs b/Makefile.objs
index a7c967633a..889115775c 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -24,6 +24,7 @@ block-obj-m = block/
 crypto-obj-y = crypto/
 
 io-obj-y = io/
+io-obj-y += yank.o
 
 endif # CONFIG_SOFTMMU or CONFIG_TOOLS
 
diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
index 777ff5954e..0fa7a364f3 100644
--- a/include/io/channel-socket.h
+++ b/include/io/channel-socket.h
@@ -48,6 +48,7 @@ struct QIOChannelSocket {
     socklen_t localAddrLen;
     struct sockaddr_storage remoteAddr;
     socklen_t remoteAddrLen;
+    bool yank;
 };
 
 
diff --git a/include/io/channel.h b/include/io/channel.h
index d4557f0930..782b618694 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -132,6 +132,8 @@ struct QIOChannelClass {
                         bool enabled);
     void (*io_set_delay)(QIOChannel *ioc,
                          bool enabled);
+    void (*io_set_yank)(QIOChannel *ioc,
+                        bool enabled);
     off_t (*io_seek)(QIOChannel *ioc,
                      off_t offset,
                      int whence,
@@ -550,6 +552,16 @@ int qio_channel_shutdown(QIOChannel *ioc,
 void qio_channel_set_delay(QIOChannel *ioc,
                            bool enabled);
 
+/**
+ * qio_channel_set_yank:
+ * @ioc: the channel object
+ * @enabled: the new flag state
+ *
+ * Controls wether this channel participates in yanking.
+ */
+void qio_channel_set_yank(QIOChannel *ioc,
+                          bool enabled);
+
 /**
  * qio_channel_set_cork:
  * @ioc: the channel object
diff --git a/io/channel-socket.c b/io/channel-socket.c
index b74f5b92a0..be03946d29 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -26,6 +26,7 @@
 #include "io/channel-watch.h"
 #include "trace.h"
 #include "qapi/clone-visitor.h"
+#include "yank.h"
 
 #define SOCKET_MAX_FDS 16
 
@@ -55,6 +56,7 @@ qio_channel_socket_new(void)
 
     sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
     sioc->fd = -1;
+    sioc->yank = 0;
 
     ioc = QIO_CHANNEL(sioc);
     qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
@@ -395,10 +397,19 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
     return NULL;
 }
 
+static void qio_channel_socket_yank(void *opaque)
+{
+    QIOChannel *ioc = opaque;
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+
+    shutdown(sioc->fd, SHUT_RDWR);
+}
+
 static void qio_channel_socket_init(Object *obj)
 {
     QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
     ioc->fd = -1;
+    ioc->yank = 0;
 }
 
 static void qio_channel_socket_finalize(Object *obj)
@@ -422,6 +433,9 @@ static void qio_channel_socket_finalize(Object *obj)
         closesocket(ioc->fd);
         ioc->fd = -1;
     }
+    if (ioc->yank) {
+        yank_unregister_function(qio_channel_socket_yank, ioc);
+    }
 }
 
 
@@ -686,6 +700,20 @@ qio_channel_socket_set_delay(QIOChannel *ioc,
                     &v, sizeof(v));
 }
 
+static void
+qio_channel_socket_set_yank(QIOChannel *ioc,
+                            bool enabled)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+
+    if (sioc->yank) {
+        yank_unregister_function(qio_channel_socket_yank, ioc);
+    }
+    sioc->yank = enabled;
+    if (sioc->yank) {
+        yank_register_function(qio_channel_socket_yank, ioc);
+    }
+}
 
 static void
 qio_channel_socket_set_cork(QIOChannel *ioc,
@@ -784,6 +812,7 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
     ioc_klass->io_shutdown = qio_channel_socket_shutdown;
     ioc_klass->io_set_cork = qio_channel_socket_set_cork;
     ioc_klass->io_set_delay = qio_channel_socket_set_delay;
+    ioc_klass->io_set_yank = qio_channel_socket_set_yank;
     ioc_klass->io_create_watch = qio_channel_socket_create_watch;
     ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
 }
diff --git a/io/channel.c b/io/channel.c
index e4376eb0bc..0c4095e0e0 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -373,6 +373,15 @@ void qio_channel_set_delay(QIOChannel *ioc,
     }
 }
 
+void qio_channel_set_yank(QIOChannel *ioc,
+                          bool enabled)
+{
+    QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+
+    if (klass->io_set_yank) {
+        klass->io_set_yank(ioc, enabled);
+    }
+}
 
 void qio_channel_set_cork(QIOChannel *ioc,
                           bool enabled)
-- 
2.20.1


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-11 11:14 [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Lukas Straub
  2020-05-11 11:14 ` [PATCH 1/5] Introduce yank feature Lukas Straub
  2020-05-11 11:14 ` [PATCH 2/5] io/channel.c,io/channel-socket.c: Add " Lukas Straub
@ 2020-05-11 11:14 ` Lukas Straub
  2020-05-11 16:19   ` Dr. David Alan Gilbert
  2020-05-11 11:14 ` [PATCH 4/5] chardev/char-socket.c: " Lukas Straub
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 52+ messages in thread
From: Lukas Straub @ 2020-05-11 11:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, Dr. David Alan Gilbert, Max Reitz,
	Paolo Bonzini, Marc-André Lureau

[-- Attachment #1: Type: text/plain, Size: 10604 bytes --]

Add yank option, pass it to the socket-channel and register a yank
function which sets s->state = NBD_CLIENT_QUIT. This is the same
behaviour as if an error occured.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 Makefile.objs        |  1 +
 block/nbd.c          | 68 +++++++++++++++++++++++++++++++++-----------
 qapi/block-core.json |  5 +++-
 3 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/Makefile.objs b/Makefile.objs
index 889115775c..4b213b3e78 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -18,6 +18,7 @@ block-obj-y += block.o blockjob.o job.o
 block-obj-y += block/ scsi/
 block-obj-y += qemu-io-cmds.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
+block-obj-y += yank.o
 
 block-obj-m = block/
 
diff --git a/block/nbd.c b/block/nbd.c
index 2160859f64..3c0fd3abb8 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -35,6 +35,7 @@
 #include "qemu/option.h"
 #include "qemu/cutils.h"
 #include "qemu/main-loop.h"
+#include "qemu/atomic.h"
 
 #include "qapi/qapi-visit-sockets.h"
 #include "qapi/qmp/qstring.h"
@@ -43,6 +44,8 @@
 #include "block/nbd.h"
 #include "block/block_int.h"
 
+#include "yank.h"
+
 #define EN_OPTSTR ":exportname="
 #define MAX_NBD_REQUESTS    16
 
@@ -91,6 +94,7 @@ typedef struct BDRVNBDState {
     QCryptoTLSCreds *tlscreds;
     const char *hostname;
     char *x_dirty_bitmap;
+    bool yank;
 } BDRVNBDState;
 
 static int nbd_client_connect(BlockDriverState *bs, Error **errp);
@@ -111,12 +115,12 @@ static void nbd_clear_bdrvstate(BDRVNBDState *s)
 static void nbd_channel_error(BDRVNBDState *s, int ret)
 {
     if (ret == -EIO) {
-        if (s->state == NBD_CLIENT_CONNECTED) {
+        if (atomic_read(&s->state) == NBD_CLIENT_CONNECTED) {
             s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT :
                                             NBD_CLIENT_CONNECTING_NOWAIT;
         }
     } else {
-        if (s->state == NBD_CLIENT_CONNECTED) {
+        if (atomic_read(&s->state) == NBD_CLIENT_CONNECTED) {
             qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
         }
         s->state = NBD_CLIENT_QUIT;
@@ -167,7 +171,7 @@ static void nbd_client_attach_aio_context(BlockDriverState *bs,
      * s->connection_co is either yielded from nbd_receive_reply or from
      * nbd_co_reconnect_loop()
      */
-    if (s->state == NBD_CLIENT_CONNECTED) {
+    if (atomic_read(&s->state) == NBD_CLIENT_CONNECTED) {
         qio_channel_attach_aio_context(QIO_CHANNEL(s->ioc), new_context);
     }
 
@@ -206,7 +210,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
-    if (s->state == NBD_CLIENT_CONNECTED) {
+    if (atomic_read(&s->state) == NBD_CLIENT_CONNECTED) {
         /* finish any pending coroutines */
         assert(s->ioc);
         qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
@@ -230,13 +234,14 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 
 static bool nbd_client_connecting(BDRVNBDState *s)
 {
-    return s->state == NBD_CLIENT_CONNECTING_WAIT ||
-        s->state == NBD_CLIENT_CONNECTING_NOWAIT;
+    NBDClientState state = atomic_read(&s->state);
+    return state == NBD_CLIENT_CONNECTING_WAIT ||
+        state == NBD_CLIENT_CONNECTING_NOWAIT;
 }
 
 static bool nbd_client_connecting_wait(BDRVNBDState *s)
 {
-    return s->state == NBD_CLIENT_CONNECTING_WAIT;
+    return atomic_read(&s->state) == NBD_CLIENT_CONNECTING_WAIT;
 }
 
 static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
@@ -305,7 +310,7 @@ static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
     nbd_reconnect_attempt(s);
 
     while (nbd_client_connecting(s)) {
-        if (s->state == NBD_CLIENT_CONNECTING_WAIT &&
+        if (atomic_read(&s->state) == NBD_CLIENT_CONNECTING_WAIT &&
             qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time_ns > delay_ns)
         {
             s->state = NBD_CLIENT_CONNECTING_NOWAIT;
@@ -341,7 +346,7 @@ static coroutine_fn void nbd_connection_entry(void *opaque)
     int ret = 0;
     Error *local_err = NULL;
 
-    while (s->state != NBD_CLIENT_QUIT) {
+    while (atomic_read(&s->state) != NBD_CLIENT_QUIT) {
         /*
          * The NBD client can only really be considered idle when it has
          * yielded from qio_channel_readv_all_eof(), waiting for data. This is
@@ -356,7 +361,7 @@ static coroutine_fn void nbd_connection_entry(void *opaque)
             nbd_co_reconnect_loop(s);
         }
 
-        if (s->state != NBD_CLIENT_CONNECTED) {
+        if (atomic_read(&s->state) != NBD_CLIENT_CONNECTED) {
             continue;
         }
 
@@ -435,7 +440,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
         qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
     }
 
-    if (s->state != NBD_CLIENT_CONNECTED) {
+    if (atomic_read(&s->state) != NBD_CLIENT_CONNECTED) {
         rc = -EIO;
         goto err;
     }
@@ -462,7 +467,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
     if (qiov) {
         qio_channel_set_cork(s->ioc, true);
         rc = nbd_send_request(s->ioc, request);
-        if (rc >= 0 && s->state == NBD_CLIENT_CONNECTED) {
+        if (rc >= 0 && atomic_read(&s->state) == NBD_CLIENT_CONNECTED) {
             if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
                                        NULL) < 0) {
                 rc = -EIO;
@@ -777,7 +782,7 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
     s->requests[i].receiving = true;
     qemu_coroutine_yield();
     s->requests[i].receiving = false;
-    if (s->state != NBD_CLIENT_CONNECTED) {
+    if (atomic_read(&s->state) != NBD_CLIENT_CONNECTED) {
         error_setg(errp, "Connection closed");
         return -EIO;
     }
@@ -936,7 +941,7 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
     NBDReply local_reply;
     NBDStructuredReplyChunk *chunk;
     Error *local_err = NULL;
-    if (s->state != NBD_CLIENT_CONNECTED) {
+    if (atomic_read(&s->state) != NBD_CLIENT_CONNECTED) {
         error_setg(&local_err, "Connection closed");
         nbd_iter_channel_error(iter, -EIO, &local_err);
         goto break_loop;
@@ -961,7 +966,8 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
     }
 
     /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
-    if (nbd_reply_is_simple(reply) || s->state != NBD_CLIENT_CONNECTED) {
+    if (nbd_reply_is_simple(reply) ||
+        atomic_read(&s->state) != NBD_CLIENT_CONNECTED) {
         goto break_loop;
     }
 
@@ -1395,6 +1401,14 @@ static int nbd_client_reopen_prepare(BDRVReopenState *state,
     return 0;
 }
 
+static void nbd_yank(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    atomic_set(&s->state, NBD_CLIENT_QUIT);
+}
+
 static void nbd_client_close(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
@@ -1407,14 +1421,17 @@ static void nbd_client_close(BlockDriverState *bs)
     nbd_teardown_connection(bs);
 }
 
-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
+static QIOChannelSocket *nbd_establish_connection(BlockDriverState *bs,
+                                                  SocketAddress *saddr,
                                                   Error **errp)
 {
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     QIOChannelSocket *sioc;
     Error *local_err = NULL;
 
     sioc = qio_channel_socket_new();
     qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
+    qio_channel_set_yank(QIO_CHANNEL(sioc), s->yank);
 
     qio_channel_socket_connect_sync(sioc, saddr, &local_err);
     if (local_err) {
@@ -1438,7 +1455,7 @@ static int nbd_client_connect(BlockDriverState *bs, Error **errp)
      * establish TCP connection, return error if it fails
      * TODO: Configurable retry-until-timeout behaviour.
      */
-    QIOChannelSocket *sioc = nbd_establish_connection(s->saddr, errp);
+    QIOChannelSocket *sioc = nbd_establish_connection(bs, s->saddr, errp);
 
     if (!sioc) {
         return -ECONNREFUSED;
@@ -1829,6 +1846,12 @@ static QemuOptsList nbd_runtime_opts = {
                     "future requests before a successful reconnect will "
                     "immediately fail. Default 0",
         },
+        {
+            .name = "yank",
+            .type = QEMU_OPT_BOOL,
+            .help = "Forcibly close the connection and don't attempt to "
+                    "reconnect when the 'yank' qmp command is executed.",
+        },
         { /* end of list */ }
     },
 };
@@ -1888,6 +1911,8 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
 
     s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
 
+    s->yank = qemu_opt_get_bool(opts, "yank", false);
+
     ret = 0;
 
  error:
@@ -1921,6 +1946,10 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
     /* successfully connected */
     s->state = NBD_CLIENT_CONNECTED;
 
+    if (s->yank) {
+        yank_register_function(nbd_yank, bs);
+    }
+
     s->connection_co = qemu_coroutine_create(nbd_connection_entry, s);
     bdrv_inc_in_flight(bs);
     aio_co_schedule(bdrv_get_aio_context(bs), s->connection_co);
@@ -1972,6 +2001,11 @@ static void nbd_close(BlockDriverState *bs)
     BDRVNBDState *s = bs->opaque;
 
     nbd_client_close(bs);
+
+    if (s->yank) {
+        yank_unregister_function(nbd_yank, bs);
+    }
+
     nbd_clear_bdrvstate(s);
 }
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 943df1926a..1c1578160e 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3862,6 +3862,8 @@
 #                   reconnect. After that time, any delayed requests and all
 #                   future requests before a successful reconnect will
 #                   immediately fail. Default 0 (Since 4.2)
+# @yank: Forcibly close the connection and don't attempt to reconnect when
+#        the 'yank' qmp command is executed. (Since: 5.1)
 #
 # Since: 2.9
 ##
@@ -3870,7 +3872,8 @@
             '*export': 'str',
             '*tls-creds': 'str',
             '*x-dirty-bitmap': 'str',
-            '*reconnect-delay': 'uint32' } }
+            '*reconnect-delay': 'uint32',
+	    'yank': 'bool' } }
 
 ##
 # @BlockdevOptionsRaw:
-- 
2.20.1


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 4/5] chardev/char-socket.c: Add yank feature
  2020-05-11 11:14 [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Lukas Straub
                   ` (2 preceding siblings ...)
  2020-05-11 11:14 ` [PATCH 3/5] block/nbd.c: " Lukas Straub
@ 2020-05-11 11:14 ` Lukas Straub
  2020-05-12  8:56   ` Daniel P. Berrangé
  2020-05-11 11:14 ` [PATCH 5/5] migration: " Lukas Straub
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 52+ messages in thread
From: Lukas Straub @ 2020-05-11 11:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, Dr. David Alan Gilbert, Max Reitz,
	Paolo Bonzini, Marc-André Lureau

[-- Attachment #1: Type: text/plain, Size: 3459 bytes --]

Add yank option which is passed to the socket-channel.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 chardev/char-socket.c | 8 ++++++++
 chardev/char.c        | 3 +++
 qapi/char.json        | 5 ++++-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 185fe38dda..e476358941 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -65,6 +65,7 @@ typedef struct {
     int max_size;
     int do_telnetopt;
     int do_nodelay;
+    int do_yank;
     int *read_msgfds;
     size_t read_msgfds_num;
     int *write_msgfds;
@@ -877,6 +878,9 @@ static int tcp_chr_new_client(Chardev *chr, QIOChannelSocket *sioc)
     if (s->do_nodelay) {
         qio_channel_set_delay(s->ioc, false);
     }
+    if (s->do_yank) {
+        qio_channel_set_yank(s->ioc, true);
+    }
     if (s->listener) {
         qio_net_listener_set_client_func_full(s->listener, NULL, NULL,
                                               NULL, chr->gcontext);
@@ -1297,6 +1301,7 @@ static void qmp_chardev_open_socket(Chardev *chr,
     SocketChardev *s = SOCKET_CHARDEV(chr);
     ChardevSocket *sock = backend->u.socket.data;
     bool do_nodelay     = sock->has_nodelay ? sock->nodelay : false;
+    bool do_yank        = sock->has_yank    ? sock->yank    : false;
     bool is_listen      = sock->has_server  ? sock->server  : true;
     bool is_telnet      = sock->has_telnet  ? sock->telnet  : false;
     bool is_tn3270      = sock->has_tn3270  ? sock->tn3270  : false;
@@ -1310,6 +1315,7 @@ static void qmp_chardev_open_socket(Chardev *chr,
     s->is_tn3270 = is_tn3270;
     s->is_websock = is_websock;
     s->do_nodelay = do_nodelay;
+    s->do_yank = do_yank;
     if (sock->tls_creds) {
         Object *creds;
         creds = object_resolve_path_component(
@@ -1400,6 +1406,8 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
 
     sock->has_nodelay = qemu_opt_get(opts, "delay");
     sock->nodelay = !qemu_opt_get_bool(opts, "delay", true);
+    sock->has_yank = qemu_opt_get(opts, "yank");
+    sock->yank = qemu_opt_get_bool(opts, "yank", false);
     /*
      * We have different default to QMP for 'server', hence
      * we can't just check for existence of 'server'
diff --git a/chardev/char.c b/chardev/char.c
index e77564060d..04075389bf 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -939,6 +939,9 @@ QemuOptsList qemu_chardev_opts = {
         },{
             .name = "logappend",
             .type = QEMU_OPT_BOOL,
+        },{
+            .name = "yank",
+            .type = QEMU_OPT_BOOL,
         },
         { /* end of list */ }
     },
diff --git a/qapi/char.json b/qapi/char.json
index daceb20f84..f9c04e720c 100644
--- a/qapi/char.json
+++ b/qapi/char.json
@@ -270,6 +270,8 @@
 #             then attempt a reconnect after the given number of seconds.
 #             Setting this to zero disables this function. (default: 0)
 #             (Since: 2.2)
+# @yank: Shutdown the socket when the 'yank' qmp command is executed.
+#        (Since: 5.1)
 #
 # Since: 1.4
 ##
@@ -283,7 +285,8 @@
             '*telnet': 'bool',
             '*tn3270': 'bool',
             '*websocket': 'bool',
-            '*reconnect': 'int' },
+            '*reconnect': 'int',
+            '*yank': 'bool' },
   'base': 'ChardevCommon' }
 
 ##
-- 
2.20.1


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 5/5] migration: Add yank feature
  2020-05-11 11:14 [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Lukas Straub
                   ` (3 preceding siblings ...)
  2020-05-11 11:14 ` [PATCH 4/5] chardev/char-socket.c: " Lukas Straub
@ 2020-05-11 11:14 ` Lukas Straub
  2020-05-11 11:49 ` [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Daniel P. Berrangé
  2020-05-13 19:12 ` Lukas Straub
  6 siblings, 0 replies; 52+ messages in thread
From: Lukas Straub @ 2020-05-11 11:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, Dr. David Alan Gilbert, Max Reitz,
	Paolo Bonzini, Marc-André Lureau

[-- Attachment #1: Type: text/plain, Size: 5431 bytes --]

Add yank option which is passed to the socket-channel.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 migration/channel.c   |  2 ++
 migration/migration.c | 11 +++++++++++
 qapi/migration.json   | 17 ++++++++++++++---
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/migration/channel.c b/migration/channel.c
index 20e4c8e2dc..498af99104 100644
--- a/migration/channel.c
+++ b/migration/channel.c
@@ -35,6 +35,7 @@ void migration_channel_process_incoming(QIOChannel *ioc)
     trace_migration_set_incoming_channel(
         ioc, object_get_typename(OBJECT(ioc)));
 
+    qio_channel_set_yank(ioc, s->parameters.yank);
     if (s->parameters.tls_creds &&
         *s->parameters.tls_creds &&
         !object_dynamic_cast(OBJECT(ioc),
@@ -67,6 +68,7 @@ void migration_channel_connect(MigrationState *s,
         ioc, object_get_typename(OBJECT(ioc)), hostname, error);
 
     if (!error) {
+        qio_channel_set_yank(ioc, s->parameters.yank);
         if (s->parameters.tls_creds &&
             *s->parameters.tls_creds &&
             !object_dynamic_cast(OBJECT(ioc),
diff --git a/migration/migration.c b/migration/migration.c
index 187ac0410c..b6f2f82dfb 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -814,6 +814,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
     params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
     params->has_max_cpu_throttle = true;
     params->max_cpu_throttle = s->parameters.max_cpu_throttle;
+    params->has_yank = true;
+    params->yank = s->parameters.yank;
     params->has_announce_initial = true;
     params->announce_initial = s->parameters.announce_initial;
     params->has_announce_max = true;
@@ -1364,6 +1366,9 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
     if (params->has_max_cpu_throttle) {
         dest->max_cpu_throttle = params->max_cpu_throttle;
     }
+    if (params->has_yank) {
+        dest->yank = params->yank;
+    }
     if (params->has_announce_initial) {
         dest->announce_initial = params->announce_initial;
     }
@@ -1472,6 +1477,9 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
     if (params->has_max_cpu_throttle) {
         s->parameters.max_cpu_throttle = params->max_cpu_throttle;
     }
+    if (params->has_yank) {
+        s->parameters.yank = params->yank;
+    }
     if (params->has_announce_initial) {
         s->parameters.announce_initial = params->announce_initial;
     }
@@ -3623,6 +3631,8 @@ static Property migration_properties[] = {
     DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
                       parameters.max_cpu_throttle,
                       DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
+    DEFINE_PROP_BOOL("yank", MigrationState,
+                      parameters.yank, false),
     DEFINE_PROP_SIZE("announce-initial", MigrationState,
                       parameters.announce_initial,
                       DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
@@ -3711,6 +3721,7 @@ static void migration_instance_init(Object *obj)
     params->has_xbzrle_cache_size = true;
     params->has_max_postcopy_bandwidth = true;
     params->has_max_cpu_throttle = true;
+    params->has_yank = true;
     params->has_announce_initial = true;
     params->has_announce_max = true;
     params->has_announce_rounds = true;
diff --git a/qapi/migration.json b/qapi/migration.json
index eca2981d0a..ad9e431a8f 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -623,6 +623,9 @@
 #          will consume more CPU.
 #          Defaults to 1. (Since 5.0)
 #
+# @yank: Shutdown the migration socket when the 'yank' qmp command is
+#        executed. (Since: 5.1)
+#
 # Since: 2.4
 ##
 { 'enum': 'MigrationParameter',
@@ -636,7 +639,7 @@
            'multifd-channels',
            'xbzrle-cache-size', 'max-postcopy-bandwidth',
            'max-cpu-throttle', 'multifd-compression',
-           'multifd-zlib-level' ,'multifd-zstd-level' ] }
+           'multifd-zlib-level' ,'multifd-zstd-level', 'yank' ] }
 
 ##
 # @MigrateSetParameters:
@@ -747,6 +750,9 @@
 #          will consume more CPU.
 #          Defaults to 1. (Since 5.0)
 #
+# @yank: Shutdown the migration socket when the 'yank' qmp command is
+#        executed. (Since: 5.1)
+#
 # Since: 2.4
 ##
 # TODO either fuse back into MigrationParameters, or make
@@ -776,7 +782,8 @@
             '*max-cpu-throttle': 'int',
             '*multifd-compression': 'MultiFDCompression',
             '*multifd-zlib-level': 'int',
-            '*multifd-zstd-level': 'int' } }
+            '*multifd-zstd-level': 'int',
+            '*yank': 'bool'} }
 
 ##
 # @migrate-set-parameters:
@@ -907,6 +914,9 @@
 #          will consume more CPU.
 #          Defaults to 1. (Since 5.0)
 #
+# @yank: Shutdown the migration socket when the 'yank' qmp command is
+#        executed. (Since: 5.1)
+#
 # Since: 2.4
 ##
 { 'struct': 'MigrationParameters',
@@ -934,7 +944,8 @@
             '*max-cpu-throttle': 'uint8',
             '*multifd-compression': 'MultiFDCompression',
             '*multifd-zlib-level': 'uint8',
-            '*multifd-zstd-level': 'uint8' } }
+            '*multifd-zstd-level': 'uint8',
+            '*yank': 'bool'} }
 
 ##
 # @query-migrate-parameters:
-- 
2.20.1

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-11 11:14 [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Lukas Straub
                   ` (4 preceding siblings ...)
  2020-05-11 11:14 ` [PATCH 5/5] migration: " Lukas Straub
@ 2020-05-11 11:49 ` Daniel P. Berrangé
  2020-05-11 12:07   ` Dr. David Alan Gilbert
  2020-05-11 18:12   ` Lukas Straub
  2020-05-13 19:12 ` Lukas Straub
  6 siblings, 2 replies; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-11 11:49 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Paolo Bonzini, Marc-André Lureau, Dr. David Alan Gilbert

On Mon, May 11, 2020 at 01:14:34PM +0200, Lukas Straub wrote:
> Hello Everyone,
> In many cases, if qemu has a network connection (qmp, migration, chardev, etc.)
> to some other server and that server dies or hangs, qemu hangs too.

If qemu as a whole hangs due to a stalled network connection, that is a
bug in QEMU that we should be fixing IMHO. QEMU should be doing non-blocking
I/O in general, such that if the network connection or remote server stalls,
we simply stop sending I/O - we shouldn't ever hang the QEMU process or main
loop.

There are places in QEMU code which are not well behaved in this respect,
but many are, and others are getting fixed where found to be important.

Arguably any place in QEMU code which can result in a hang of QEMU in the
event of a stalled network should be considered a security flaw, because
the network is untrusted in general.

> These patches introduce the new 'yank' out-of-band qmp command to recover from
> these kinds of hangs. The different subsystems register callbacks which get
> executed with the yank command. For example the callback can shutdown() a
> socket. This is intended for the colo use-case, but it can be used for other
> things too of course.

IIUC, invoking the "yank" command unconditionally kills every single
network connection in QEMU that has registered with the "yank" subsystem.
IMHO this is way too big of a hammer, even if we accept there are bugs in
QEMU not handling stalled networking well.

eg if a chardev hangs QEMU, and we tear down everything, killing the NBD
connection used for the guest disk, we needlessly break I/O.

eg doing this in the chardev backend is not desirable, because the bugs
with hanging QEMU are typically caused by the way the frontend device
uses the chardev blocking I/O calls, instead of non-blocking I/O calls.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2/5] io/channel.c,io/channel-socket.c: Add yank feature
  2020-05-11 11:14 ` [PATCH 2/5] io/channel.c,io/channel-socket.c: Add " Lukas Straub
@ 2020-05-11 11:51   ` Daniel P. Berrangé
  2020-05-11 20:00     ` Lukas Straub
  0 siblings, 1 reply; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-11 11:51 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Paolo Bonzini, Marc-André Lureau, Dr. David Alan Gilbert

On Mon, May 11, 2020 at 01:14:41PM +0200, Lukas Straub wrote:
> Add qio_channel_set_yank function to channel and to channel-socket,
> which will register a yank function. The yank function calls
> shutdown() on the socket.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>  Makefile.objs               |  1 +
>  include/io/channel-socket.h |  1 +
>  include/io/channel.h        | 12 ++++++++++++
>  io/channel-socket.c         | 29 +++++++++++++++++++++++++++++
>  io/channel.c                |  9 +++++++++
>  5 files changed, 52 insertions(+)

Assuming we want the yank feature (which I'm not entirely convinced
of), then I don't think any of this addition should exist. The
QIOChannel class already provides a "qio_channel_shutdown" method
which can be invoked. The layer above which is using the QIOChannel
should be calling this existing qio_channel_shutdown method in
response to any yank request.  The I/O layer shouldn't have any
direct dependancy on the yank feature.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-11 11:49 ` [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Daniel P. Berrangé
@ 2020-05-11 12:07   ` Dr. David Alan Gilbert
  2020-05-11 12:17     ` Daniel P. Berrangé
  2020-05-11 18:12   ` Lukas Straub
  1 sibling, 1 reply; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-11 12:07 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, Lukas Straub, qemu-block, Juan Quintela, qemu-devel,
	Max Reitz, Paolo Bonzini, Marc-André Lureau

* Daniel P. BerrangÃ© (berrange@redhat.com) wrote:
> On Mon, May 11, 2020 at 01:14:34PM +0200, Lukas Straub wrote:
> > Hello Everyone,
> > In many cases, if qemu has a network connection (qmp, migration, chardev, etc.)
> > to some other server and that server dies or hangs, qemu hangs too.
> 
> If qemu as a whole hangs due to a stalled network connection, that is a
> bug in QEMU that we should be fixing IMHO. QEMU should be doing non-blocking
> I/O in general, such that if the network connection or remote server stalls,
> we simply stop sending I/O - we shouldn't ever hang the QEMU process or main
> loop.
> 
> There are places in QEMU code which are not well behaved in this respect,
> but many are, and others are getting fixed where found to be important.
> 
> Arguably any place in QEMU code which can result in a hang of QEMU in the
> event of a stalled network should be considered a security flaw, because
> the network is untrusted in general.

That's not really true of the 'management network' - people trust that
and I don't see a lot of the qemu code getting fixed safely for all of
them.

> > These patches introduce the new 'yank' out-of-band qmp command to recover from
> > these kinds of hangs. The different subsystems register callbacks which get
> > executed with the yank command. For example the callback can shutdown() a
> > socket. This is intended for the colo use-case, but it can be used for other
> > things too of course.
> 
> IIUC, invoking the "yank" command unconditionally kills every single
> network connection in QEMU that has registered with the "yank" subsystem.
> IMHO this is way too big of a hammer, even if we accept there are bugs in
> QEMU not handling stalled networking well.

But isn't this hammer conditional - I see that it's a migration
capabiltiy for the migration socket, and a flag in nbd - so it only
yanks things you've told it to.

> eg if a chardev hangs QEMU, and we tear down everything, killing the NBD
> connection used for the guest disk, we needlessly break I/O.
> 
> eg doing this in the chardev backend is not desirable, because the bugs
> with hanging QEMU are typically caused by the way the frontend device
> uses the chardev blocking I/O calls, instead of non-blocking I/O calls.
> 

Having a way to get out of any of these problems from a single point is
quite nice.  To be useful in COLO you need to know for sure you can get
out of any network screwup.

We already use shutdown(2) in migrate_cancel and migrate-pause for
basically the same reason; I don't think we've got anything similar for
NBD, and we probably should have (I think I asked for it fairly
recently).

Dave



> Regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-11 12:07   ` Dr. David Alan Gilbert
@ 2020-05-11 12:17     ` Daniel P. Berrangé
  2020-05-11 15:46       ` Dr. David Alan Gilbert
  2020-05-11 19:41       ` Lukas Straub
  0 siblings, 2 replies; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-11 12:17 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Kevin Wolf, Lukas Straub, qemu-block, Juan Quintela, qemu-devel,
	Max Reitz, Paolo Bonzini, Marc-André Lureau

On Mon, May 11, 2020 at 01:07:18PM +0100, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berrange@redhat.com) wrote:
> > On Mon, May 11, 2020 at 01:14:34PM +0200, Lukas Straub wrote:
> > > Hello Everyone,
> > > In many cases, if qemu has a network connection (qmp, migration, chardev, etc.)
> > > to some other server and that server dies or hangs, qemu hangs too.
> > 
> > If qemu as a whole hangs due to a stalled network connection, that is a
> > bug in QEMU that we should be fixing IMHO. QEMU should be doing non-blocking
> > I/O in general, such that if the network connection or remote server stalls,
> > we simply stop sending I/O - we shouldn't ever hang the QEMU process or main
> > loop.
> > 
> > There are places in QEMU code which are not well behaved in this respect,
> > but many are, and others are getting fixed where found to be important.
> > 
> > Arguably any place in QEMU code which can result in a hang of QEMU in the
> > event of a stalled network should be considered a security flaw, because
> > the network is untrusted in general.
> 
> That's not really true of the 'management network' - people trust that
> and I don't see a lot of the qemu code getting fixed safely for all of
> them.

It depends on the user / app / deployment scenario. In OpenStack alot of
work was done to beef up security between services on the mgmt network,
with TLS encryption as standard to reduce attack vectors.

> > > These patches introduce the new 'yank' out-of-band qmp command to recover from
> > > these kinds of hangs. The different subsystems register callbacks which get
> > > executed with the yank command. For example the callback can shutdown() a
> > > socket. This is intended for the colo use-case, but it can be used for other
> > > things too of course.
> > 
> > IIUC, invoking the "yank" command unconditionally kills every single
> > network connection in QEMU that has registered with the "yank" subsystem.
> > IMHO this is way too big of a hammer, even if we accept there are bugs in
> > QEMU not handling stalled networking well.
> 
> But isn't this hammer conditional - I see that it's a migration
> capabiltiy for the migration socket, and a flag in nbd - so it only
> yanks things you've told it to.

IIUC, you have to set these flags upfront when you launch QEMU, or
hotplug the device using the feature. When something gets stuck,
and you issue the "yank" command, then everything that has the flag
enabled gets torn down. So in practice it looks like the flag will
get enabled for everything at QEMU startup, and yanking down tear
down everything.

> > eg if a chardev hangs QEMU, and we tear down everything, killing the NBD
> > connection used for the guest disk, we needlessly break I/O.
> > 
> > eg doing this in the chardev backend is not desirable, because the bugs
> > with hanging QEMU are typically caused by the way the frontend device
> > uses the chardev blocking I/O calls, instead of non-blocking I/O calls.
> > 
> 
> Having a way to get out of any of these problems from a single point is
> quite nice.  To be useful in COLO you need to know for sure you can get
> out of any network screwup.
> 
> We already use shutdown(2) in migrate_cancel and migrate-pause for
> basically the same reason; I don't think we've got anything similar for
> NBD, and we probably should have (I think I asked for it fairly
> recently).

Yes, the migrate_cancel is an example of a more fine grained way to
recover. I was thinking that we need an equivalent fine control knob
for NBD too.

That way if QEMU does get stuck, you can start by tearing down the
least distruptive channel. eg try tearing down the migration connection
first (which shouldn't negatively impact the guest), and only if that
doesn't work then, move on to tear down the NBD connection (which risks
data loss)

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-11 12:17     ` Daniel P. Berrangé
@ 2020-05-11 15:46       ` Dr. David Alan Gilbert
  2020-05-12  9:32         ` Lukas Straub
  2020-05-11 19:41       ` Lukas Straub
  1 sibling, 1 reply; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-11 15:46 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, Lukas Straub, qemu-block, Juan Quintela, qemu-devel,
	Max Reitz, Paolo Bonzini, Marc-André Lureau

* Daniel P. BerrangÃ© (berrange@redhat.com) wrote:
> On Mon, May 11, 2020 at 01:07:18PM +0100, Dr. David Alan Gilbert wrote:
> > * Daniel P. BerrangÃ© (berrange@redhat.com) wrote:
> > > On Mon, May 11, 2020 at 01:14:34PM +0200, Lukas Straub wrote:
> > > > Hello Everyone,
> > > > In many cases, if qemu has a network connection (qmp, migration, chardev, etc.)
> > > > to some other server and that server dies or hangs, qemu hangs too.
> > > 
> > > If qemu as a whole hangs due to a stalled network connection, that is a
> > > bug in QEMU that we should be fixing IMHO. QEMU should be doing non-blocking
> > > I/O in general, such that if the network connection or remote server stalls,
> > > we simply stop sending I/O - we shouldn't ever hang the QEMU process or main
> > > loop.
> > > 
> > > There are places in QEMU code which are not well behaved in this respect,
> > > but many are, and others are getting fixed where found to be important.
> > > 
> > > Arguably any place in QEMU code which can result in a hang of QEMU in the
> > > event of a stalled network should be considered a security flaw, because
> > > the network is untrusted in general.
> > 
> > That's not really true of the 'management network' - people trust that
> > and I don't see a lot of the qemu code getting fixed safely for all of
> > them.
> 
> It depends on the user / app / deployment scenario. In OpenStack alot of
> work was done to beef up security between services on the mgmt network,
> with TLS encryption as standard to reduce attack vectors.
> 
> > > > These patches introduce the new 'yank' out-of-band qmp command to recover from
> > > > these kinds of hangs. The different subsystems register callbacks which get
> > > > executed with the yank command. For example the callback can shutdown() a
> > > > socket. This is intended for the colo use-case, but it can be used for other
> > > > things too of course.
> > > 
> > > IIUC, invoking the "yank" command unconditionally kills every single
> > > network connection in QEMU that has registered with the "yank" subsystem.
> > > IMHO this is way too big of a hammer, even if we accept there are bugs in
> > > QEMU not handling stalled networking well.
> > 
> > But isn't this hammer conditional - I see that it's a migration
> > capabiltiy for the migration socket, and a flag in nbd - so it only
> > yanks things you've told it to.
> 
> IIUC, you have to set these flags upfront when you launch QEMU, or
> hotplug the device using the feature. When something gets stuck,
> and you issue the "yank" command, then everything that has the flag
> enabled gets torn down. So in practice it looks like the flag will
> get enabled for everything at QEMU startup, and yanking down tear
> down everything.

For COLO I really expect it for the migration stream, the disk mirroring
stream and probably the network comparison/forwarding streams.

> > > eg if a chardev hangs QEMU, and we tear down everything, killing the NBD
> > > connection used for the guest disk, we needlessly break I/O.
> > > 
> > > eg doing this in the chardev backend is not desirable, because the bugs
> > > with hanging QEMU are typically caused by the way the frontend device
> > > uses the chardev blocking I/O calls, instead of non-blocking I/O calls.
> > > 
> > 
> > Having a way to get out of any of these problems from a single point is
> > quite nice.  To be useful in COLO you need to know for sure you can get
> > out of any network screwup.
> > 
> > We already use shutdown(2) in migrate_cancel and migrate-pause for
> > basically the same reason; I don't think we've got anything similar for
> > NBD, and we probably should have (I think I asked for it fairly
> > recently).
> 
> Yes, the migrate_cancel is an example of a more fine grained way to
> recover. I was thinking that we need an equivalent fine control knob
> for NBD too.

I feel it might be nice not to have to create so many separate knobs.

> That way if QEMU does get stuck, you can start by tearing down the
> least distruptive channel. eg try tearing down the migration connection
> first (which shouldn't negatively impact the guest), and only if that
> doesn't work then, move on to tear down the NBD connection (which risks
> data loss)

I wonder if a different way would be to make all network connections
register with yank, but then make yank take a list of connections to
shutdown(2).

Dave

> Regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-11 11:14 ` [PATCH 3/5] block/nbd.c: " Lukas Straub
@ 2020-05-11 16:19   ` Dr. David Alan Gilbert
  2020-05-11 17:05     ` Lukas Straub
  0 siblings, 1 reply; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-11 16:19 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz, Paolo Bonzini,
	Marc-André Lureau

* Lukas Straub (lukasstraub2@web.de) wrote:
> Add yank option, pass it to the socket-channel and register a yank
> function which sets s->state = NBD_CLIENT_QUIT. This is the same
> behaviour as if an error occured.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>

> +static void nbd_yank(void *opaque)
> +{
> +    BlockDriverState *bs = opaque;
> +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> +
> +    atomic_set(&s->state, NBD_CLIENT_QUIT);

I think I was expecting a shutdown on the socket here - why doesn't it
have one?

Dave

> +}
> +
>  static void nbd_client_close(BlockDriverState *bs)
>  {
>      BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> @@ -1407,14 +1421,17 @@ static void nbd_client_close(BlockDriverState *bs)
>      nbd_teardown_connection(bs);
>  }
>  
> -static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
> +static QIOChannelSocket *nbd_establish_connection(BlockDriverState *bs,
> +                                                  SocketAddress *saddr,
>                                                    Error **errp)
>  {
> +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
>      QIOChannelSocket *sioc;
>      Error *local_err = NULL;
>  
>      sioc = qio_channel_socket_new();
>      qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
> +    qio_channel_set_yank(QIO_CHANNEL(sioc), s->yank);
>  
>      qio_channel_socket_connect_sync(sioc, saddr, &local_err);
>      if (local_err) {
> @@ -1438,7 +1455,7 @@ static int nbd_client_connect(BlockDriverState *bs, Error **errp)
>       * establish TCP connection, return error if it fails
>       * TODO: Configurable retry-until-timeout behaviour.
>       */
> -    QIOChannelSocket *sioc = nbd_establish_connection(s->saddr, errp);
> +    QIOChannelSocket *sioc = nbd_establish_connection(bs, s->saddr, errp);
>  
>      if (!sioc) {
>          return -ECONNREFUSED;
> @@ -1829,6 +1846,12 @@ static QemuOptsList nbd_runtime_opts = {
>                      "future requests before a successful reconnect will "
>                      "immediately fail. Default 0",
>          },
> +        {
> +            .name = "yank",
> +            .type = QEMU_OPT_BOOL,
> +            .help = "Forcibly close the connection and don't attempt to "
> +                    "reconnect when the 'yank' qmp command is executed.",
> +        },
>          { /* end of list */ }
>      },
>  };
> @@ -1888,6 +1911,8 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
>  
>      s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
>  
> +    s->yank = qemu_opt_get_bool(opts, "yank", false);
> +
>      ret = 0;
>  
>   error:
> @@ -1921,6 +1946,10 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
>      /* successfully connected */
>      s->state = NBD_CLIENT_CONNECTED;
>  
> +    if (s->yank) {
> +        yank_register_function(nbd_yank, bs);
> +    }
> +
>      s->connection_co = qemu_coroutine_create(nbd_connection_entry, s);
>      bdrv_inc_in_flight(bs);
>      aio_co_schedule(bdrv_get_aio_context(bs), s->connection_co);
> @@ -1972,6 +2001,11 @@ static void nbd_close(BlockDriverState *bs)
>      BDRVNBDState *s = bs->opaque;
>  
>      nbd_client_close(bs);
> +
> +    if (s->yank) {
> +        yank_unregister_function(nbd_yank, bs);
> +    }
> +
>      nbd_clear_bdrvstate(s);
>  }
>  
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 943df1926a..1c1578160e 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -3862,6 +3862,8 @@
>  #                   reconnect. After that time, any delayed requests and all
>  #                   future requests before a successful reconnect will
>  #                   immediately fail. Default 0 (Since 4.2)
> +# @yank: Forcibly close the connection and don't attempt to reconnect when
> +#        the 'yank' qmp command is executed. (Since: 5.1)
>  #
>  # Since: 2.9
>  ##
> @@ -3870,7 +3872,8 @@
>              '*export': 'str',
>              '*tls-creds': 'str',
>              '*x-dirty-bitmap': 'str',
> -            '*reconnect-delay': 'uint32' } }
> +            '*reconnect-delay': 'uint32',
> +	    'yank': 'bool' } }
>  
>  ##
>  # @BlockdevOptionsRaw:
> -- 
> 2.20.1
> 


--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-11 16:19   ` Dr. David Alan Gilbert
@ 2020-05-11 17:05     ` Lukas Straub
  2020-05-11 17:24       ` Dr. David Alan Gilbert
  2020-05-12  8:54       ` Daniel P. Berrangé
  0 siblings, 2 replies; 52+ messages in thread
From: Lukas Straub @ 2020-05-11 17:05 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz, Paolo Bonzini,
	Marc-André Lureau

[-- Attachment #1: Type: text/plain, Size: 4966 bytes --]

On Mon, 11 May 2020 17:19:09 +0100
"Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:

> * Lukas Straub (lukasstraub2@web.de) wrote:
> > Add yank option, pass it to the socket-channel and register a yank
> > function which sets s->state = NBD_CLIENT_QUIT. This is the same
> > behaviour as if an error occured.
> > 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>  
> 
> > +static void nbd_yank(void *opaque)
> > +{
> > +    BlockDriverState *bs = opaque;
> > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > +
> > +    atomic_set(&s->state, NBD_CLIENT_QUIT);  
> 
> I think I was expecting a shutdown on the socket here - why doesn't it
> have one?

For nbd, we register two yank functions: This one and we enable the yank feature on the qio channel (see function nbd_establish_connection below).

Regards,
Lukas Straub

> Dave
> 
> > +}
> > +
> >  static void nbd_client_close(BlockDriverState *bs)
> >  {
> >      BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > @@ -1407,14 +1421,17 @@ static void nbd_client_close(BlockDriverState *bs)
> >      nbd_teardown_connection(bs);
> >  }
> >  
> > -static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
> > +static QIOChannelSocket *nbd_establish_connection(BlockDriverState *bs,
> > +                                                  SocketAddress *saddr,
> >                                                    Error **errp)
> >  {
> > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> >      QIOChannelSocket *sioc;
> >      Error *local_err = NULL;
> >  
> >      sioc = qio_channel_socket_new();
> >      qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
> > +    qio_channel_set_yank(QIO_CHANNEL(sioc), s->yank);
> >  
> >      qio_channel_socket_connect_sync(sioc, saddr, &local_err);
> >      if (local_err) {
> > @@ -1438,7 +1455,7 @@ static int nbd_client_connect(BlockDriverState *bs, Error **errp)
> >       * establish TCP connection, return error if it fails
> >       * TODO: Configurable retry-until-timeout behaviour.
> >       */
> > -    QIOChannelSocket *sioc = nbd_establish_connection(s->saddr, errp);
> > +    QIOChannelSocket *sioc = nbd_establish_connection(bs, s->saddr, errp);
> >  
> >      if (!sioc) {
> >          return -ECONNREFUSED;
> > @@ -1829,6 +1846,12 @@ static QemuOptsList nbd_runtime_opts = {
> >                      "future requests before a successful reconnect will "
> >                      "immediately fail. Default 0",
> >          },
> > +        {
> > +            .name = "yank",
> > +            .type = QEMU_OPT_BOOL,
> > +            .help = "Forcibly close the connection and don't attempt to "
> > +                    "reconnect when the 'yank' qmp command is executed.",
> > +        },
> >          { /* end of list */ }
> >      },
> >  };
> > @@ -1888,6 +1911,8 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
> >  
> >      s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
> >  
> > +    s->yank = qemu_opt_get_bool(opts, "yank", false);
> > +
> >      ret = 0;
> >  
> >   error:
> > @@ -1921,6 +1946,10 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
> >      /* successfully connected */
> >      s->state = NBD_CLIENT_CONNECTED;
> >  
> > +    if (s->yank) {
> > +        yank_register_function(nbd_yank, bs);
> > +    }
> > +
> >      s->connection_co = qemu_coroutine_create(nbd_connection_entry, s);
> >      bdrv_inc_in_flight(bs);
> >      aio_co_schedule(bdrv_get_aio_context(bs), s->connection_co);
> > @@ -1972,6 +2001,11 @@ static void nbd_close(BlockDriverState *bs)
> >      BDRVNBDState *s = bs->opaque;
> >  
> >      nbd_client_close(bs);
> > +
> > +    if (s->yank) {
> > +        yank_unregister_function(nbd_yank, bs);
> > +    }
> > +
> >      nbd_clear_bdrvstate(s);
> >  }
> >  
> > diff --git a/qapi/block-core.json b/qapi/block-core.json
> > index 943df1926a..1c1578160e 100644
> > --- a/qapi/block-core.json
> > +++ b/qapi/block-core.json
> > @@ -3862,6 +3862,8 @@
> >  #                   reconnect. After that time, any delayed requests and all
> >  #                   future requests before a successful reconnect will
> >  #                   immediately fail. Default 0 (Since 4.2)
> > +# @yank: Forcibly close the connection and don't attempt to reconnect when
> > +#        the 'yank' qmp command is executed. (Since: 5.1)
> >  #
> >  # Since: 2.9
> >  ##
> > @@ -3870,7 +3872,8 @@
> >              '*export': 'str',
> >              '*tls-creds': 'str',
> >              '*x-dirty-bitmap': 'str',
> > -            '*reconnect-delay': 'uint32' } }
> > +            '*reconnect-delay': 'uint32',
> > +	    'yank': 'bool' } }
> >  
> >  ##
> >  # @BlockdevOptionsRaw:
> > -- 
> > 2.20.1
> >   
> 
> 
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-11 17:05     ` Lukas Straub
@ 2020-05-11 17:24       ` Dr. David Alan Gilbert
  2020-05-12  8:54       ` Daniel P. Berrangé
  1 sibling, 0 replies; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-11 17:24 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz, Paolo Bonzini,
	Marc-André Lureau

* Lukas Straub (lukasstraub2@web.de) wrote:
> On Mon, 11 May 2020 17:19:09 +0100
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> 
> > * Lukas Straub (lukasstraub2@web.de) wrote:
> > > Add yank option, pass it to the socket-channel and register a yank
> > > function which sets s->state = NBD_CLIENT_QUIT. This is the same
> > > behaviour as if an error occured.
> > > 
> > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>  
> > 
> > > +static void nbd_yank(void *opaque)
> > > +{
> > > +    BlockDriverState *bs = opaque;
> > > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > > +
> > > +    atomic_set(&s->state, NBD_CLIENT_QUIT);  
> > 
> > I think I was expecting a shutdown on the socket here - why doesn't it
> > have one?
> 
> For nbd, we register two yank functions: This one and we enable the yank feature on the qio channel (see function nbd_establish_connection below).

Oh I see; yeh that still surprises me a little; I'd expected one yank
per item.

Dave

> Regards,
> Lukas Straub
> 
> > Dave
> > 
> > > +}
> > > +
> > >  static void nbd_client_close(BlockDriverState *bs)
> > >  {
> > >      BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > > @@ -1407,14 +1421,17 @@ static void nbd_client_close(BlockDriverState *bs)
> > >      nbd_teardown_connection(bs);
> > >  }
> > >  
> > > -static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
> > > +static QIOChannelSocket *nbd_establish_connection(BlockDriverState *bs,
> > > +                                                  SocketAddress *saddr,
> > >                                                    Error **errp)
> > >  {
> > > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > >      QIOChannelSocket *sioc;
> > >      Error *local_err = NULL;
> > >  
> > >      sioc = qio_channel_socket_new();
> > >      qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
> > > +    qio_channel_set_yank(QIO_CHANNEL(sioc), s->yank);
> > >  
> > >      qio_channel_socket_connect_sync(sioc, saddr, &local_err);
> > >      if (local_err) {
> > > @@ -1438,7 +1455,7 @@ static int nbd_client_connect(BlockDriverState *bs, Error **errp)
> > >       * establish TCP connection, return error if it fails
> > >       * TODO: Configurable retry-until-timeout behaviour.
> > >       */
> > > -    QIOChannelSocket *sioc = nbd_establish_connection(s->saddr, errp);
> > > +    QIOChannelSocket *sioc = nbd_establish_connection(bs, s->saddr, errp);
> > >  
> > >      if (!sioc) {
> > >          return -ECONNREFUSED;
> > > @@ -1829,6 +1846,12 @@ static QemuOptsList nbd_runtime_opts = {
> > >                      "future requests before a successful reconnect will "
> > >                      "immediately fail. Default 0",
> > >          },
> > > +        {
> > > +            .name = "yank",
> > > +            .type = QEMU_OPT_BOOL,
> > > +            .help = "Forcibly close the connection and don't attempt to "
> > > +                    "reconnect when the 'yank' qmp command is executed.",
> > > +        },
> > >          { /* end of list */ }
> > >      },
> > >  };
> > > @@ -1888,6 +1911,8 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
> > >  
> > >      s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
> > >  
> > > +    s->yank = qemu_opt_get_bool(opts, "yank", false);
> > > +
> > >      ret = 0;
> > >  
> > >   error:
> > > @@ -1921,6 +1946,10 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
> > >      /* successfully connected */
> > >      s->state = NBD_CLIENT_CONNECTED;
> > >  
> > > +    if (s->yank) {
> > > +        yank_register_function(nbd_yank, bs);
> > > +    }
> > > +
> > >      s->connection_co = qemu_coroutine_create(nbd_connection_entry, s);
> > >      bdrv_inc_in_flight(bs);
> > >      aio_co_schedule(bdrv_get_aio_context(bs), s->connection_co);
> > > @@ -1972,6 +2001,11 @@ static void nbd_close(BlockDriverState *bs)
> > >      BDRVNBDState *s = bs->opaque;
> > >  
> > >      nbd_client_close(bs);
> > > +
> > > +    if (s->yank) {
> > > +        yank_unregister_function(nbd_yank, bs);
> > > +    }
> > > +
> > >      nbd_clear_bdrvstate(s);
> > >  }
> > >  
> > > diff --git a/qapi/block-core.json b/qapi/block-core.json
> > > index 943df1926a..1c1578160e 100644
> > > --- a/qapi/block-core.json
> > > +++ b/qapi/block-core.json
> > > @@ -3862,6 +3862,8 @@
> > >  #                   reconnect. After that time, any delayed requests and all
> > >  #                   future requests before a successful reconnect will
> > >  #                   immediately fail. Default 0 (Since 4.2)
> > > +# @yank: Forcibly close the connection and don't attempt to reconnect when
> > > +#        the 'yank' qmp command is executed. (Since: 5.1)
> > >  #
> > >  # Since: 2.9
> > >  ##
> > > @@ -3870,7 +3872,8 @@
> > >              '*export': 'str',
> > >              '*tls-creds': 'str',
> > >              '*x-dirty-bitmap': 'str',
> > > -            '*reconnect-delay': 'uint32' } }
> > > +            '*reconnect-delay': 'uint32',
> > > +	    'yank': 'bool' } }
> > >  
> > >  ##
> > >  # @BlockdevOptionsRaw:
> > > -- 
> > > 2.20.1
> > >   
> > 
> > 
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > 
> 


--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-11 11:49 ` [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Daniel P. Berrangé
  2020-05-11 12:07   ` Dr. David Alan Gilbert
@ 2020-05-11 18:12   ` Lukas Straub
  2020-05-12  9:03     ` Daniel P. Berrangé
  2020-05-12  9:09     ` Dr. David Alan Gilbert
  1 sibling, 2 replies; 52+ messages in thread
From: Lukas Straub @ 2020-05-11 18:12 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Paolo Bonzini, Marc-André Lureau, Dr. David Alan Gilbert

[-- Attachment #1: Type: text/plain, Size: 2574 bytes --]

On Mon, 11 May 2020 12:49:47 +0100
Daniel P. Berrangé <berrange@redhat.com> wrote:

> On Mon, May 11, 2020 at 01:14:34PM +0200, Lukas Straub wrote:
> > Hello Everyone,
> > In many cases, if qemu has a network connection (qmp, migration, chardev, etc.)
> > to some other server and that server dies or hangs, qemu hangs too.  
> 
> If qemu as a whole hangs due to a stalled network connection, that is a
> bug in QEMU that we should be fixing IMHO. QEMU should be doing non-blocking
> I/O in general, such that if the network connection or remote server stalls,
> we simply stop sending I/O - we shouldn't ever hang the QEMU process or main
> loop.
> 
> There are places in QEMU code which are not well behaved in this respect,
> but many are, and others are getting fixed where found to be important.
> 
> Arguably any place in QEMU code which can result in a hang of QEMU in the
> event of a stalled network should be considered a security flaw, because
> the network is untrusted in general.

The fact that out-of-band qmp commands exist at all shows that we have to make tradeoffs of developer time vs. doing things right. Sure, the migration code can be rewritten to use non-blocking i/o and finegrained locks. But as a hobbyist I don't have time to fix this.

> > These patches introduce the new 'yank' out-of-band qmp command to recover from
> > these kinds of hangs. The different subsystems register callbacks which get
> > executed with the yank command. For example the callback can shutdown() a
> > socket. This is intended for the colo use-case, but it can be used for other
> > things too of course.  
> 
> IIUC, invoking the "yank" command unconditionally kills every single
> network connection in QEMU that has registered with the "yank" subsystem.
> IMHO this is way too big of a hammer, even if we accept there are bugs in
> QEMU not handling stalled networking well.
> 
> eg if a chardev hangs QEMU, and we tear down everything, killing the NBD
> connection used for the guest disk, we needlessly break I/O.

Yeah, these patches are intended to solve the problems with the colo use-case where all external connections (migration, chardevs, nbd) are just for replication. In other use-cases you'd enable the yank feature only on the non-essential connections.

> eg doing this in the chardev backend is not desirable, because the bugs
> with hanging QEMU are typically caused by the way the frontend device
> uses the chardev blocking I/O calls, instead of non-blocking I/O calls.
> 
> 
> Regards,
> Daniel


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-11 12:17     ` Daniel P. Berrangé
  2020-05-11 15:46       ` Dr. David Alan Gilbert
@ 2020-05-11 19:41       ` Lukas Straub
  1 sibling, 0 replies; 52+ messages in thread
From: Lukas Straub @ 2020-05-11 19:41 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel,
	Dr. David Alan Gilbert, Max Reitz, Paolo Bonzini,
	Marc-André Lureau

[-- Attachment #1: Type: text/plain, Size: 4438 bytes --]

On Mon, 11 May 2020 13:17:14 +0100
Daniel P. Berrangé <berrange@redhat.com> wrote:

> On Mon, May 11, 2020 at 01:07:18PM +0100, Dr. David Alan Gilbert wrote:
> > * Daniel P. Berrangé (berrange@redhat.com) wrote:  
> > > On Mon, May 11, 2020 at 01:14:34PM +0200, Lukas Straub wrote:  
> > > > Hello Everyone,
> > > > In many cases, if qemu has a network connection (qmp, migration, chardev, etc.)
> > > > to some other server and that server dies or hangs, qemu hangs too.  
> > > 
> > > If qemu as a whole hangs due to a stalled network connection, that is a
> > > bug in QEMU that we should be fixing IMHO. QEMU should be doing non-blocking
> > > I/O in general, such that if the network connection or remote server stalls,
> > > we simply stop sending I/O - we shouldn't ever hang the QEMU process or main
> > > loop.
> > > 
> > > There are places in QEMU code which are not well behaved in this respect,
> > > but many are, and others are getting fixed where found to be important.
> > > 
> > > Arguably any place in QEMU code which can result in a hang of QEMU in the
> > > event of a stalled network should be considered a security flaw, because
> > > the network is untrusted in general.  
> > 
> > That's not really true of the 'management network' - people trust that
> > and I don't see a lot of the qemu code getting fixed safely for all of
> > them.  
> 
> It depends on the user / app / deployment scenario. In OpenStack alot of
> work was done to beef up security between services on the mgmt network,
> with TLS encryption as standard to reduce attack vectors.
> 
> > > > These patches introduce the new 'yank' out-of-band qmp command to recover from
> > > > these kinds of hangs. The different subsystems register callbacks which get
> > > > executed with the yank command. For example the callback can shutdown() a
> > > > socket. This is intended for the colo use-case, but it can be used for other
> > > > things too of course.  
> > > 
> > > IIUC, invoking the "yank" command unconditionally kills every single
> > > network connection in QEMU that has registered with the "yank" subsystem.
> > > IMHO this is way too big of a hammer, even if we accept there are bugs in
> > > QEMU not handling stalled networking well.  
> > 
> > But isn't this hammer conditional - I see that it's a migration
> > capabiltiy for the migration socket, and a flag in nbd - so it only
> > yanks things you've told it to.  
> 
> IIUC, you have to set these flags upfront when you launch QEMU, or
> hotplug the device using the feature. When something gets stuck,
> and you issue the "yank" command, then everything that has the flag
> enabled gets torn down. So in practice it looks like the flag will
> get enabled for everything at QEMU startup, and yanking down tear
> down everything.
> 
> > > eg if a chardev hangs QEMU, and we tear down everything, killing the NBD
> > > connection used for the guest disk, we needlessly break I/O.
> > > 
> > > eg doing this in the chardev backend is not desirable, because the bugs
> > > with hanging QEMU are typically caused by the way the frontend device
> > > uses the chardev blocking I/O calls, instead of non-blocking I/O calls.
> > >   
> > 
> > Having a way to get out of any of these problems from a single point is
> > quite nice.  To be useful in COLO you need to know for sure you can get
> > out of any network screwup.
> > 
> > We already use shutdown(2) in migrate_cancel and migrate-pause for
> > basically the same reason; I don't think we've got anything similar for
> > NBD, and we probably should have (I think I asked for it fairly
> > recently).  
> 
> Yes, the migrate_cancel is an example of a more fine grained way to
> recover. I was thinking that we need an equivalent fine control knob
> for NBD too.

One reason why the yank feature is done this way is that the management application may not know in what state qemu is and so it doesn't know what to yank. Poking in the dark would work too in my case, but it's not that nice.

Regards,
Lukas Straub

> That way if QEMU does get stuck, you can start by tearing down the
> least distruptive channel. eg try tearing down the migration connection
> first (which shouldn't negatively impact the guest), and only if that
> doesn't work then, move on to tear down the NBD connection (which risks
> data loss)
> 
> Regards,
> Daniel


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2/5] io/channel.c,io/channel-socket.c: Add yank feature
  2020-05-11 11:51   ` Daniel P. Berrangé
@ 2020-05-11 20:00     ` Lukas Straub
  2020-05-12  8:52       ` Daniel P. Berrangé
  0 siblings, 1 reply; 52+ messages in thread
From: Lukas Straub @ 2020-05-11 20:00 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Paolo Bonzini, Marc-André Lureau, Dr. David Alan Gilbert

[-- Attachment #1: Type: text/plain, Size: 1263 bytes --]

On Mon, 11 May 2020 12:51:46 +0100
Daniel P. Berrangé <berrange@redhat.com> wrote:

> On Mon, May 11, 2020 at 01:14:41PM +0200, Lukas Straub wrote:
> > Add qio_channel_set_yank function to channel and to channel-socket,
> > which will register a yank function. The yank function calls
> > shutdown() on the socket.
> > 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > ---
> >  Makefile.objs               |  1 +
> >  include/io/channel-socket.h |  1 +
> >  include/io/channel.h        | 12 ++++++++++++
> >  io/channel-socket.c         | 29 +++++++++++++++++++++++++++++
> >  io/channel.c                |  9 +++++++++
> >  5 files changed, 52 insertions(+)  
> 
> Assuming we want the yank feature (which I'm not entirely convinced
> of), then I don't think any of this addition should exist. The
> QIOChannel class already provides a "qio_channel_shutdown" method
> which can be invoked. The layer above which is using the QIOChannel
> should be calling this existing qio_channel_shutdown method in
> response to any yank request.  The I/O layer shouldn't have any
> direct dependancy on the yank feature.

Having the code here simplifys the code in the other places.

Regards,
Lukas Straub

> 
> Regards,
> Daniel


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2/5] io/channel.c,io/channel-socket.c: Add yank feature
  2020-05-11 20:00     ` Lukas Straub
@ 2020-05-12  8:52       ` Daniel P. Berrangé
  0 siblings, 0 replies; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-12  8:52 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Paolo Bonzini, Marc-André Lureau, Dr. David Alan Gilbert

On Mon, May 11, 2020 at 10:00:42PM +0200, Lukas Straub wrote:
> On Mon, 11 May 2020 12:51:46 +0100
> Daniel P. Berrangé <berrange@redhat.com> wrote:
> 
> > On Mon, May 11, 2020 at 01:14:41PM +0200, Lukas Straub wrote:
> > > Add qio_channel_set_yank function to channel and to channel-socket,
> > > which will register a yank function. The yank function calls
> > > shutdown() on the socket.
> > > 
> > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > ---
> > >  Makefile.objs               |  1 +
> > >  include/io/channel-socket.h |  1 +
> > >  include/io/channel.h        | 12 ++++++++++++
> > >  io/channel-socket.c         | 29 +++++++++++++++++++++++++++++
> > >  io/channel.c                |  9 +++++++++
> > >  5 files changed, 52 insertions(+)  
> > 
> > Assuming we want the yank feature (which I'm not entirely convinced
> > of), then I don't think any of this addition should exist. The
> > QIOChannel class already provides a "qio_channel_shutdown" method
> > which can be invoked. The layer above which is using the QIOChannel
> > should be calling this existing qio_channel_shutdown method in
> > response to any yank request.  The I/O layer shouldn't have any
> > direct dependancy on the yank feature.
> 
> Having the code here simplifys the code in the other places.

This introduces a depedancy from the IO channel code into the system
emulator infra which is not desired. The goal is to keep the io/ module
isolated and as self-contained as possible.

I don't think it makes a significant difference to the yank code to
keep it out of the io layer, and simply call the QIOChannel objects
via their existing public API.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-11 17:05     ` Lukas Straub
  2020-05-11 17:24       ` Dr. David Alan Gilbert
@ 2020-05-12  8:54       ` Daniel P. Berrangé
  2020-05-15  9:48         ` Lukas Straub
  1 sibling, 1 reply; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-12  8:54 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini, Dr. David Alan Gilbert

On Mon, May 11, 2020 at 07:05:24PM +0200, Lukas Straub wrote:
> On Mon, 11 May 2020 17:19:09 +0100
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> 
> > * Lukas Straub (lukasstraub2@web.de) wrote:
> > > Add yank option, pass it to the socket-channel and register a yank
> > > function which sets s->state = NBD_CLIENT_QUIT. This is the same
> > > behaviour as if an error occured.
> > > 
> > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>  
> > 
> > > +static void nbd_yank(void *opaque)
> > > +{
> > > +    BlockDriverState *bs = opaque;
> > > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > > +
> > > +    atomic_set(&s->state, NBD_CLIENT_QUIT);  
> > 
> > I think I was expecting a shutdown on the socket here - why doesn't it
> > have one?
> 
> For nbd, we register two yank functions: This one and we enable
> the yank feature on the qio channel (see function
> nbd_establish_connection below).

As mentioned on the earlier patch, I don't want to see any yank
code in the QIOChannel object directly. This nbd_yank function
can simply call the qio_channel_shutdown() function directly
and avoid need for modifying the QIOChannel object with yank
support.

This will make the NBD code clearer too, as we can see exactly
what actions are performed at NBD layer when a yank happens,
which is what David was not seeing clearly here.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 4/5] chardev/char-socket.c: Add yank feature
  2020-05-11 11:14 ` [PATCH 4/5] chardev/char-socket.c: " Lukas Straub
@ 2020-05-12  8:56   ` Daniel P. Berrangé
  0 siblings, 0 replies; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-12  8:56 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Paolo Bonzini, Marc-André Lureau, Dr. David Alan Gilbert

On Mon, May 11, 2020 at 01:14:47PM +0200, Lukas Straub wrote:
> Add yank option which is passed to the socket-channel.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>  chardev/char-socket.c | 8 ++++++++
>  chardev/char.c        | 3 +++
>  qapi/char.json        | 5 ++++-
>  3 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index 185fe38dda..e476358941 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -65,6 +65,7 @@ typedef struct {
>      int max_size;
>      int do_telnetopt;
>      int do_nodelay;
> +    int do_yank;
>      int *read_msgfds;
>      size_t read_msgfds_num;
>      int *write_msgfds;
> @@ -877,6 +878,9 @@ static int tcp_chr_new_client(Chardev *chr, QIOChannelSocket *sioc)
>      if (s->do_nodelay) {
>          qio_channel_set_delay(s->ioc, false);
>      }
> +    if (s->do_yank) {
> +        qio_channel_set_yank(s->ioc, true);
> +    }

This should call yank_register_function() to  register a local
callback, which then invokes qio_channel_shutdown() upon acting,
avoiding the need to add this qio_channel_set_yank method.

Likewise for the migration code in the next patch.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-11 18:12   ` Lukas Straub
@ 2020-05-12  9:03     ` Daniel P. Berrangé
  2020-05-12  9:06       ` Dr. David Alan Gilbert
  2020-05-12  9:09     ` Dr. David Alan Gilbert
  1 sibling, 1 reply; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-12  9:03 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Paolo Bonzini, Marc-André Lureau, Dr. David Alan Gilbert

On Mon, May 11, 2020 at 08:12:18PM +0200, Lukas Straub wrote:
> On Mon, 11 May 2020 12:49:47 +0100
> Daniel P. Berrangé <berrange@redhat.com> wrote:
> 
> > On Mon, May 11, 2020 at 01:14:34PM +0200, Lukas Straub wrote:
> > > Hello Everyone,
> > > In many cases, if qemu has a network connection (qmp, migration, chardev, etc.)
> > > to some other server and that server dies or hangs, qemu hangs too.  
> > 
> > If qemu as a whole hangs due to a stalled network connection, that is a
> > bug in QEMU that we should be fixing IMHO. QEMU should be doing non-blocking
> > I/O in general, such that if the network connection or remote server stalls,
> > we simply stop sending I/O - we shouldn't ever hang the QEMU process or main
> > loop.
> > 
> > There are places in QEMU code which are not well behaved in this respect,
> > but many are, and others are getting fixed where found to be important.
> > 
> > Arguably any place in QEMU code which can result in a hang of QEMU in the
> > event of a stalled network should be considered a security flaw, because
> > the network is untrusted in general.
> 
> The fact that out-of-band qmp commands exist at all shows that we have to make tradeoffs of developer time vs. doing things right. Sure, the migration code can be rewritten to use non-blocking i/o and finegrained locks. But as a hobbyist I don't have time to fix this.
> 
> > > These patches introduce the new 'yank' out-of-band qmp command to recover from
> > > these kinds of hangs. The different subsystems register callbacks which get
> > > executed with the yank command. For example the callback can shutdown() a
> > > socket. This is intended for the colo use-case, but it can be used for other
> > > things too of course.  
> > 
> > IIUC, invoking the "yank" command unconditionally kills every single
> > network connection in QEMU that has registered with the "yank" subsystem.
> > IMHO this is way too big of a hammer, even if we accept there are bugs in
> > QEMU not handling stalled networking well.
> > 
> > eg if a chardev hangs QEMU, and we tear down everything, killing the NBD
> > connection used for the guest disk, we needlessly break I/O.
> 
> Yeah, these patches are intended to solve the problems with the colo
> use-case where all external connections (migration, chardevs, nbd)
> are just for replication. In other use-cases you'd enable the yank
> feature only on the non-essential connections.

That is a pretty inflexible design for other use cases though,
as "non-essential" is not a black & white list in general. There
are varying levels of importance to the different channels. We
can afford to loose migration without any user visible effects.
If that doesn't solve it, a serial device chardev, or VNC connection
can be dropped at the inconvenience of loosing interactive console
which is end user visible impact, so may only be want to be yanked
if the migration yank didn't fix it. 

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-12  9:03     ` Daniel P. Berrangé
@ 2020-05-12  9:06       ` Dr. David Alan Gilbert
  0 siblings, 0 replies; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-12  9:06 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, Lukas Straub, qemu-block, Juan Quintela, qemu-devel,
	Max Reitz, Paolo Bonzini, Marc-André Lureau

* Daniel P. BerrangÃ© (berrange@redhat.com) wrote:
> On Mon, May 11, 2020 at 08:12:18PM +0200, Lukas Straub wrote:
> > On Mon, 11 May 2020 12:49:47 +0100
> > Daniel P. BerrangÃ© <berrange@redhat.com> wrote:
> > 
> > > On Mon, May 11, 2020 at 01:14:34PM +0200, Lukas Straub wrote:
> > > > Hello Everyone,
> > > > In many cases, if qemu has a network connection (qmp, migration, chardev, etc.)
> > > > to some other server and that server dies or hangs, qemu hangs too.  
> > > 
> > > If qemu as a whole hangs due to a stalled network connection, that is a
> > > bug in QEMU that we should be fixing IMHO. QEMU should be doing non-blocking
> > > I/O in general, such that if the network connection or remote server stalls,
> > > we simply stop sending I/O - we shouldn't ever hang the QEMU process or main
> > > loop.
> > > 
> > > There are places in QEMU code which are not well behaved in this respect,
> > > but many are, and others are getting fixed where found to be important.
> > > 
> > > Arguably any place in QEMU code which can result in a hang of QEMU in the
> > > event of a stalled network should be considered a security flaw, because
> > > the network is untrusted in general.
> > 
> > The fact that out-of-band qmp commands exist at all shows that we have to make tradeoffs of developer time vs. doing things right. Sure, the migration code can be rewritten to use non-blocking i/o and finegrained locks. But as a hobbyist I don't have time to fix this.
> > 
> > > > These patches introduce the new 'yank' out-of-band qmp command to recover from
> > > > these kinds of hangs. The different subsystems register callbacks which get
> > > > executed with the yank command. For example the callback can shutdown() a
> > > > socket. This is intended for the colo use-case, but it can be used for other
> > > > things too of course.  
> > > 
> > > IIUC, invoking the "yank" command unconditionally kills every single
> > > network connection in QEMU that has registered with the "yank" subsystem.
> > > IMHO this is way too big of a hammer, even if we accept there are bugs in
> > > QEMU not handling stalled networking well.
> > > 
> > > eg if a chardev hangs QEMU, and we tear down everything, killing the NBD
> > > connection used for the guest disk, we needlessly break I/O.
> > 
> > Yeah, these patches are intended to solve the problems with the colo
> > use-case where all external connections (migration, chardevs, nbd)
> > are just for replication. In other use-cases you'd enable the yank
> > feature only on the non-essential connections.
> 
> That is a pretty inflexible design for other use cases though,
> as "non-essential" is not a black & white list in general. There
> are varying levels of importance to the different channels. We
> can afford to loose migration without any user visible effects.
> If that doesn't solve it, a serial device chardev, or VNC connection
> can be dropped at the inconvenience of loosing interactive console
> which is end user visible impact, so may only be want to be yanked
> if the migration yank didn't fix it. 

In the case of COLO that's not the case though - here we explicitly want
to kill the migration to be able to ensure that we can recover - and
we're under time pressure to get the other member of the pair running
again. 

Dave

> Regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-11 18:12   ` Lukas Straub
  2020-05-12  9:03     ` Daniel P. Berrangé
@ 2020-05-12  9:09     ` Dr. David Alan Gilbert
  1 sibling, 0 replies; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-12  9:09 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz, Paolo Bonzini,
	Marc-André Lureau

* Lukas Straub (lukasstraub2@web.de) wrote:
> On Mon, 11 May 2020 12:49:47 +0100
> Daniel P. BerrangÃ© <berrange@redhat.com> wrote:
> 
> > On Mon, May 11, 2020 at 01:14:34PM +0200, Lukas Straub wrote:
> > > Hello Everyone,
> > > In many cases, if qemu has a network connection (qmp, migration, chardev, etc.)
> > > to some other server and that server dies or hangs, qemu hangs too.  
> > 
> > If qemu as a whole hangs due to a stalled network connection, that is a
> > bug in QEMU that we should be fixing IMHO. QEMU should be doing non-blocking
> > I/O in general, such that if the network connection or remote server stalls,
> > we simply stop sending I/O - we shouldn't ever hang the QEMU process or main
> > loop.
> > 
> > There are places in QEMU code which are not well behaved in this respect,
> > but many are, and others are getting fixed where found to be important.
> > 
> > Arguably any place in QEMU code which can result in a hang of QEMU in the
> > event of a stalled network should be considered a security flaw, because
> > the network is untrusted in general.
> 
> The fact that out-of-band qmp commands exist at all shows that we have to make tradeoffs of developer time vs. doing things right. Sure, the migration code can be rewritten to use non-blocking i/o and finegrained locks. But as a hobbyist I don't have time to fix this.

If it was just an hobbyist vs fulltime thing then I'd say that was a bad
way to make the decision; however the reality is that even those who are
paid to watch this code don't have the feeling we can make it fail
quickly/non-blocking - and for COLO you need to be absolutely sure you
nail every case, so you'd some how have to audit the whole lot, and keep
watching it.

(and thank you for taking your time to do this!)

Dave


> > > These patches introduce the new 'yank' out-of-band qmp command to recover from
> > > these kinds of hangs. The different subsystems register callbacks which get
> > > executed with the yank command. For example the callback can shutdown() a
> > > socket. This is intended for the colo use-case, but it can be used for other
> > > things too of course.  
> > 
> > IIUC, invoking the "yank" command unconditionally kills every single
> > network connection in QEMU that has registered with the "yank" subsystem.
> > IMHO this is way too big of a hammer, even if we accept there are bugs in
> > QEMU not handling stalled networking well.
> > 
> > eg if a chardev hangs QEMU, and we tear down everything, killing the NBD
> > connection used for the guest disk, we needlessly break I/O.
> 
> Yeah, these patches are intended to solve the problems with the colo use-case where all external connections (migration, chardevs, nbd) are just for replication. In other use-cases you'd enable the yank feature only on the non-essential connections.
> 
> > eg doing this in the chardev backend is not desirable, because the bugs
> > with hanging QEMU are typically caused by the way the frontend device
> > uses the chardev blocking I/O calls, instead of non-blocking I/O calls.
> > 
> > 
> > Regards,
> > Daniel
> 


--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-11 15:46       ` Dr. David Alan Gilbert
@ 2020-05-12  9:32         ` Lukas Straub
  2020-05-12  9:43           ` Daniel P. Berrangé
  0 siblings, 1 reply; 52+ messages in thread
From: Lukas Straub @ 2020-05-12  9:32 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz, Paolo Bonzini,
	Marc-André Lureau

[-- Attachment #1: Type: text/plain, Size: 1507 bytes --]

On Mon, 11 May 2020 16:46:45 +0100
"Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:

> * Daniel P. BerrangÃ© (berrange@redhat.com) wrote: 
> > ...
> > That way if QEMU does get stuck, you can start by tearing down the
> > least distruptive channel. eg try tearing down the migration connection
> > first (which shouldn't negatively impact the guest), and only if that
> > doesn't work then, move on to tear down the NBD connection (which risks
> > data loss)  
> 
> I wonder if a different way would be to make all network connections
> register with yank, but then make yank take a list of connections to
> shutdown(2).

Good Idea. We could name the connections (/yank callbacks) in the form "nbd:<node-name>", "chardev:<chardev-name>" and "migration" (and add "netdev:...", etc. in the future). Then make yank take a list of connection names as you suggest and silently ignore connections that don't exist. And maybe even add a 'query-yank' oob command returning a list of registered connections so the management application can do pattern matching if it wants.

Comments?

Regards,
Lukas Straub

> Dave
> 
> > Regards,
> > Daniel
> > -- 
> > |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> > |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> > |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|  
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-12  9:32         ` Lukas Straub
@ 2020-05-12  9:43           ` Daniel P. Berrangé
  2020-05-12 18:58             ` Dr. David Alan Gilbert
                               ` (2 more replies)
  0 siblings, 3 replies; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-12  9:43 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini, Dr. David Alan Gilbert

On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
> On Mon, 11 May 2020 16:46:45 +0100
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> 
> > * Daniel P. BerrangÃ© (berrange@redhat.com) wrote: 
> > > ...
> > > That way if QEMU does get stuck, you can start by tearing down the
> > > least distruptive channel. eg try tearing down the migration connection
> > > first (which shouldn't negatively impact the guest), and only if that
> > > doesn't work then, move on to tear down the NBD connection (which risks
> > > data loss)  
> > 
> > I wonder if a different way would be to make all network connections
> > register with yank, but then make yank take a list of connections to
> > shutdown(2).
> 
> Good Idea. We could name the connections (/yank callbacks) in the
> form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
> (and add "netdev:...", etc. in the future). Then make yank take a
> list of connection names as you suggest and silently ignore connections
> that don't exist. And maybe even add a 'query-yank' oob command returning
> a list of registered connections so the management application can do
> pattern matching if it wants.

Yes, that would make the yank command much more flexible in how it can
be used.

As an alternative to using formatted strings like this, it could be
modelled more explicitly in QAPI

  { 'struct':  'YankChannels',
    'data': { 'chardev': [ 'string' ],
              'nbd': ['string'],
	      'migration': bool } }

In this example, 'chardev' would accept a list of chardev IDs which
have it enabled, 'nbd' would accept a list of block node IDs which
have it enabled, and migration is a singleton on/off.

The benefit of this modelling is that you can introspect QEMU
to discover what classes of channels support being yanked by
this QEMU build, as well as what instances are configured to
be yanked. ie you can distinguish between a QEMU that doesn't
support yanking network devices, from a QEMU that does support
yanking network devices, but doesn't have it enabled for any
network device instances.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-12  9:43           ` Daniel P. Berrangé
@ 2020-05-12 18:58             ` Dr. David Alan Gilbert
  2020-05-13  8:41               ` Daniel P. Berrangé
  2020-05-12 19:42             ` Lukas Straub
  2020-05-13 10:32             ` Kevin Wolf
  2 siblings, 1 reply; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-12 18:58 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, Lukas Straub, qemu-block, Juan Quintela, qemu-devel,
	Max Reitz, Marc-André Lureau, Paolo Bonzini

* Daniel P. BerrangÃ© (berrange@redhat.com) wrote:
> On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
> > On Mon, 11 May 2020 16:46:45 +0100
> > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > 
> > > * Daniel P. BerrangÃƒÂ© (berrange@redhat.com) wrote: 
> > > > ...
> > > > That way if QEMU does get stuck, you can start by tearing down the
> > > > least distruptive channel. eg try tearing down the migration connection
> > > > first (which shouldn't negatively impact the guest), and only if that
> > > > doesn't work then, move on to tear down the NBD connection (which risks
> > > > data loss)  
> > > 
> > > I wonder if a different way would be to make all network connections
> > > register with yank, but then make yank take a list of connections to
> > > shutdown(2).
> > 
> > Good Idea. We could name the connections (/yank callbacks) in the
> > form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
> > (and add "netdev:...", etc. in the future). Then make yank take a
> > list of connection names as you suggest and silently ignore connections
> > that don't exist. And maybe even add a 'query-yank' oob command returning
> > a list of registered connections so the management application can do
> > pattern matching if it wants.
> 
> Yes, that would make the yank command much more flexible in how it can
> be used.
> 
> As an alternative to using formatted strings like this, it could be
> modelled more explicitly in QAPI
> 
>   { 'struct':  'YankChannels',
>     'data': { 'chardev': [ 'string' ],
>               'nbd': ['string'],
> 	      'migration': bool } }
> 
> In this example, 'chardev' would accept a list of chardev IDs which
> have it enabled, 'nbd' would accept a list of block node IDs which
> have it enabled, and migration is a singleton on/off.

Do we already have a QOM object name for each of these things?
Is that nbd/blockdevice unique - i.e. can you have multiple nbd clients
on the same node?

> The benefit of this modelling is that you can introspect QEMU
> to discover what classes of channels support being yanked by
> this QEMU build, as well as what instances are configured to
> be yanked. ie you can distinguish between a QEMU that doesn't
> support yanking network devices, from a QEMU that does support
> yanking network devices, but doesn't have it enabled for any
> network device instances.

What do we need to make it introspectable like that?

Dave

> Regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-12  9:43           ` Daniel P. Berrangé
  2020-05-12 18:58             ` Dr. David Alan Gilbert
@ 2020-05-12 19:42             ` Lukas Straub
  2020-05-13 10:32             ` Kevin Wolf
  2 siblings, 0 replies; 52+ messages in thread
From: Lukas Straub @ 2020-05-12 19:42 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini, Dr. David Alan Gilbert

[-- Attachment #1: Type: text/plain, Size: 1743 bytes --]

On Tue, 12 May 2020 10:43:37 +0100
Daniel P. Berrangé <berrange@redhat.com> wrote:

> On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
> > ... 
> > 
> > Good Idea. We could name the connections (/yank callbacks) in the
> > form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
> > (and add "netdev:...", etc. in the future). Then make yank take a
> > list of connection names as you suggest and silently ignore connections
> > that don't exist. And maybe even add a 'query-yank' oob command returning
> > a list of registered connections so the management application can do
> > pattern matching if it wants.  
> 
> Yes, that would make the yank command much more flexible in how it can
> be used.
> 
> As an alternative to using formatted strings like this, it could be
> modelled more explicitly in QAPI
> 
>   { 'struct':  'YankChannels',
>     'data': { 'chardev': [ 'string' ],
>               'nbd': ['string'],
> 	      'migration': bool } }
> 
> In this example, 'chardev' would accept a list of chardev IDs which
> have it enabled, 'nbd' would accept a list of block node IDs which
> have it enabled, and migration is a singleton on/off.

With the new command, the yank feature can then be enabled unconditionally on the instances.

> The benefit of this modelling is that you can introspect QEMU
> to discover what classes of channels support being yanked by
> this QEMU build, as well as what instances are configured to
> be yanked. ie you can distinguish between a QEMU that doesn't
> support yanking network devices, from a QEMU that does support
> yanking network devices, but doesn't have it enabled for any
> network device instances.
> 
> Regards,
> Daniel


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-12 18:58             ` Dr. David Alan Gilbert
@ 2020-05-13  8:41               ` Daniel P. Berrangé
  0 siblings, 0 replies; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-13  8:41 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Kevin Wolf, Lukas Straub, qemu-block, Juan Quintela, qemu-devel,
	Max Reitz, Paolo Bonzini, Marc-André Lureau

On Tue, May 12, 2020 at 07:58:17PM +0100, Dr. David Alan Gilbert wrote:
> * Daniel P. BerrangÃƒÂ© (berrange@redhat.com) wrote:
> > On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
> > > On Mon, 11 May 2020 16:46:45 +0100
> > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > 
> > > > * Daniel P. BerrangÃƒÆ’Ã‚Â© (berrange@redhat.com) wrote: 
> > > > > ...
> > > > > That way if QEMU does get stuck, you can start by tearing down the
> > > > > least distruptive channel. eg try tearing down the migration connection
> > > > > first (which shouldn't negatively impact the guest), and only if that
> > > > > doesn't work then, move on to tear down the NBD connection (which risks
> > > > > data loss)  
> > > > 
> > > > I wonder if a different way would be to make all network connections
> > > > register with yank, but then make yank take a list of connections to
> > > > shutdown(2).
> > > 
> > > Good Idea. We could name the connections (/yank callbacks) in the
> > > form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
> > > (and add "netdev:...", etc. in the future). Then make yank take a
> > > list of connection names as you suggest and silently ignore connections
> > > that don't exist. And maybe even add a 'query-yank' oob command returning
> > > a list of registered connections so the management application can do
> > > pattern matching if it wants.
> > 
> > Yes, that would make the yank command much more flexible in how it can
> > be used.
> > 
> > As an alternative to using formatted strings like this, it could be
> > modelled more explicitly in QAPI
> > 
> >   { 'struct':  'YankChannels',
> >     'data': { 'chardev': [ 'string' ],
> >               'nbd': ['string'],
> > 	      'migration': bool } }
> > 
> > In this example, 'chardev' would accept a list of chardev IDs which
> > have it enabled, 'nbd' would accept a list of block node IDs which
> > have it enabled, and migration is a singleton on/off.
> 
> Do we already have a QOM object name for each of these things?
> Is that nbd/blockdevice unique - i.e. can you have multiple nbd clients
> on the same node?
> 
> > The benefit of this modelling is that you can introspect QEMU
> > to discover what classes of channels support being yanked by
> > this QEMU build, as well as what instances are configured to
> > be yanked. ie you can distinguish between a QEMU that doesn't
> > support yanking network devices, from a QEMU that does support
> > yanking network devices, but doesn't have it enabled for any
> > network device instances.
> 
> What do we need to make it introspectable like that?

The model I describe above would work, because you can introspect the
QAPI schema to see what fields are in the "YankChannels" struct. So
if we added a "nic" field later, apps can discover it.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-12  9:43           ` Daniel P. Berrangé
  2020-05-12 18:58             ` Dr. David Alan Gilbert
  2020-05-12 19:42             ` Lukas Straub
@ 2020-05-13 10:32             ` Kevin Wolf
  2020-05-13 10:53               ` Dr. David Alan Gilbert
  2 siblings, 1 reply; 52+ messages in thread
From: Kevin Wolf @ 2020-05-13 10:32 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Lukas Straub, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini, Dr. David Alan Gilbert

Am 12.05.2020 um 11:43 hat Daniel P. BerrangÃ© geschrieben:
> On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
> > On Mon, 11 May 2020 16:46:45 +0100
> > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > 
> > > * Daniel P. BerrangÃƒÂ© (berrange@redhat.com) wrote: 
> > > > ...
> > > > That way if QEMU does get stuck, you can start by tearing down the
> > > > least distruptive channel. eg try tearing down the migration connection
> > > > first (which shouldn't negatively impact the guest), and only if that
> > > > doesn't work then, move on to tear down the NBD connection (which risks
> > > > data loss)  
> > > 
> > > I wonder if a different way would be to make all network connections
> > > register with yank, but then make yank take a list of connections to
> > > shutdown(2).
> > 
> > Good Idea. We could name the connections (/yank callbacks) in the
> > form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
> > (and add "netdev:...", etc. in the future). Then make yank take a
> > list of connection names as you suggest and silently ignore connections
> > that don't exist. And maybe even add a 'query-yank' oob command returning
> > a list of registered connections so the management application can do
> > pattern matching if it wants.

I'm generally not a big fan of silently ignoring things. Is there a
specific requirement to do it in this case, or can management
applications be expected to know which connections exist?

> Yes, that would make the yank command much more flexible in how it can
> be used.
> 
> As an alternative to using formatted strings like this, it could be
> modelled more explicitly in QAPI
> 
>   { 'struct':  'YankChannels',
>     'data': { 'chardev': [ 'string' ],
>               'nbd': ['string'],
> 	      'migration': bool } }
> 
> In this example, 'chardev' would accept a list of chardev IDs which
> have it enabled, 'nbd' would accept a list of block node IDs which
> have it enabled, and migration is a singleton on/off.

Of course, it also means that the yank code needs to know about every
single object that supports the operation, whereas if you only have
strings, the objects could keep registering their connection with a
generic function like yank_register_function() in this version.

I'm not sure if the additional complexity is worth the benefits.

> The benefit of this modelling is that you can introspect QEMU
> to discover what classes of channels support being yanked by
> this QEMU build, as well as what instances are configured to
> be yanked. ie you can distinguish between a QEMU that doesn't
> support yanking network devices, from a QEMU that does support
> yanking network devices, but doesn't have it enabled for any
> network device instances.

This is true, though I think Lukas' suggestion with query-yank should be
as good in practice (you can't check before creating the NBD device
then, but would you actually want to do this?).

And if all else fails, we can still add a few more feature flags to the
schema...

Kevin



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 10:32             ` Kevin Wolf
@ 2020-05-13 10:53               ` Dr. David Alan Gilbert
  2020-05-13 11:13                 ` Kevin Wolf
  0 siblings, 1 reply; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-13 10:53 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: Lukas Straub, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini

* Kevin Wolf (kwolf@redhat.com) wrote:
> Am 12.05.2020 um 11:43 hat Daniel P. BerrangÃ© geschrieben:
> > On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
> > > On Mon, 11 May 2020 16:46:45 +0100
> > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > 
> > > > * Daniel P. BerrangÃƒÂ© (berrange@redhat.com) wrote: 
> > > > > ...
> > > > > That way if QEMU does get stuck, you can start by tearing down the
> > > > > least distruptive channel. eg try tearing down the migration connection
> > > > > first (which shouldn't negatively impact the guest), and only if that
> > > > > doesn't work then, move on to tear down the NBD connection (which risks
> > > > > data loss)  
> > > > 
> > > > I wonder if a different way would be to make all network connections
> > > > register with yank, but then make yank take a list of connections to
> > > > shutdown(2).
> > > 
> > > Good Idea. We could name the connections (/yank callbacks) in the
> > > form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
> > > (and add "netdev:...", etc. in the future). Then make yank take a
> > > list of connection names as you suggest and silently ignore connections
> > > that don't exist. And maybe even add a 'query-yank' oob command returning
> > > a list of registered connections so the management application can do
> > > pattern matching if it wants.
> 
> I'm generally not a big fan of silently ignoring things. Is there a
> specific requirement to do it in this case, or can management
> applications be expected to know which connections exist?
> 
> > Yes, that would make the yank command much more flexible in how it can
> > be used.
> > 
> > As an alternative to using formatted strings like this, it could be
> > modelled more explicitly in QAPI
> > 
> >   { 'struct':  'YankChannels',
> >     'data': { 'chardev': [ 'string' ],
> >               'nbd': ['string'],
> > 	      'migration': bool } }
> > 
> > In this example, 'chardev' would accept a list of chardev IDs which
> > have it enabled, 'nbd' would accept a list of block node IDs which
> > have it enabled, and migration is a singleton on/off.
> 
> Of course, it also means that the yank code needs to know about every
> single object that supports the operation, whereas if you only have
> strings, the objects could keep registering their connection with a
> generic function like yank_register_function() in this version.
> 
> I'm not sure if the additional complexity is worth the benefits.

I tend to agree; although we do have to ensure we either use an existing
naming scheme (e.g. QOM object names?) or make sure we've got a well
defined list of prefixes.

Dave

> 
> > The benefit of this modelling is that you can introspect QEMU
> > to discover what classes of channels support being yanked by
> > this QEMU build, as well as what instances are configured to
> > be yanked. ie you can distinguish between a QEMU that doesn't
> > support yanking network devices, from a QEMU that does support
> > yanking network devices, but doesn't have it enabled for any
> > network device instances.
> 
> This is true, though I think Lukas' suggestion with query-yank should be
> as good in practice (you can't check before creating the NBD device
> then, but would you actually want to do this?).
> 
> And if all else fails, we can still add a few more feature flags to the
> schema...
> 
> Kevin
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 10:53               ` Dr. David Alan Gilbert
@ 2020-05-13 11:13                 ` Kevin Wolf
  2020-05-13 11:26                   ` Daniel P. Berrangé
                                     ` (2 more replies)
  0 siblings, 3 replies; 52+ messages in thread
From: Kevin Wolf @ 2020-05-13 11:13 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Lukas Straub, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini

Am 13.05.2020 um 12:53 hat Dr. David Alan Gilbert geschrieben:
> * Kevin Wolf (kwolf@redhat.com) wrote:
> > Am 12.05.2020 um 11:43 hat Daniel P. BerrangÃ© geschrieben:
> > > On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
> > > > On Mon, 11 May 2020 16:46:45 +0100
> > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > 
> > > > > * Daniel P. BerrangÃƒÂ© (berrange@redhat.com) wrote: 
> > > > > > ...
> > > > > > That way if QEMU does get stuck, you can start by tearing down the
> > > > > > least distruptive channel. eg try tearing down the migration connection
> > > > > > first (which shouldn't negatively impact the guest), and only if that
> > > > > > doesn't work then, move on to tear down the NBD connection (which risks
> > > > > > data loss)  
> > > > > 
> > > > > I wonder if a different way would be to make all network connections
> > > > > register with yank, but then make yank take a list of connections to
> > > > > shutdown(2).
> > > > 
> > > > Good Idea. We could name the connections (/yank callbacks) in the
> > > > form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
> > > > (and add "netdev:...", etc. in the future). Then make yank take a
> > > > list of connection names as you suggest and silently ignore connections
> > > > that don't exist. And maybe even add a 'query-yank' oob command returning
> > > > a list of registered connections so the management application can do
> > > > pattern matching if it wants.
> > 
> > I'm generally not a big fan of silently ignoring things. Is there a
> > specific requirement to do it in this case, or can management
> > applications be expected to know which connections exist?
> > 
> > > Yes, that would make the yank command much more flexible in how it can
> > > be used.
> > > 
> > > As an alternative to using formatted strings like this, it could be
> > > modelled more explicitly in QAPI
> > > 
> > >   { 'struct':  'YankChannels',
> > >     'data': { 'chardev': [ 'string' ],
> > >               'nbd': ['string'],
> > > 	      'migration': bool } }
> > > 
> > > In this example, 'chardev' would accept a list of chardev IDs which
> > > have it enabled, 'nbd' would accept a list of block node IDs which
> > > have it enabled, and migration is a singleton on/off.
> > 
> > Of course, it also means that the yank code needs to know about every
> > single object that supports the operation, whereas if you only have
> > strings, the objects could keep registering their connection with a
> > generic function like yank_register_function() in this version.
> > 
> > I'm not sure if the additional complexity is worth the benefits.
> 
> I tend to agree; although we do have to ensure we either use an existing
> naming scheme (e.g. QOM object names?) or make sure we've got a well
> defined list of prefixes.

Not everything that has a network connection is a QOM object (in fact,
neither migration nor chardev nor nbd are QOM objects).

I guess it would be nice to have a single namespace for everything in
QEMU, but the reality is that we have a few separate ones. As long as we
consistently add a prefix that identifies the namespace in question, I
think that would work.

This means that if we're using node-name to identify the NBD connection,
the namespace should be 'block' rather than 'nbd'.

One more thing to consider is, what if a single object has multiple
connections? In the case of node-names, we have a limited set of allowed
characters, so we can use one of the remaining characters as a separator
and then suffix a counter. In other places, the identifier isn't
restricted, so suffixing doesn't work. Maybe prefixing does, but it
would have to be there from the beginning then.

And another thing: Do we really want to document this as limited to
network connections? Another common cause of hangs is when you have
image files on an NFS mount and the connection goes away. Of course, in
the end this is still networking, but inside of QEMU it looks like
accessing any other file. I'm not sure that we'll allow yanking access
to image files anytime soon, but it might not hurt to keep it at the
back of our mind as a potential option we might want the design to
allow.

Kevin



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 11:13                 ` Kevin Wolf
@ 2020-05-13 11:26                   ` Daniel P. Berrangé
  2020-05-13 11:58                     ` Paolo Bonzini
  2020-05-13 12:18                     ` Kevin Wolf
  2020-05-13 12:56                   ` Dr. David Alan Gilbert
  2020-09-01 10:35                   ` Markus Armbruster
  2 siblings, 2 replies; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-13 11:26 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: Lukas Straub, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini, Dr. David Alan Gilbert

On Wed, May 13, 2020 at 01:13:20PM +0200, Kevin Wolf wrote:
> Am 13.05.2020 um 12:53 hat Dr. David Alan Gilbert geschrieben:
> > * Kevin Wolf (kwolf@redhat.com) wrote:
> > > Am 12.05.2020 um 11:43 hat Daniel P. Berrangé geschrieben:
> > > > On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
> > > > > On Mon, 11 May 2020 16:46:45 +0100
> > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > 
> > > > > > * Daniel P. BerrangÃ© (berrange@redhat.com) wrote: 
> > > > > > > ...
> > > > > > > That way if QEMU does get stuck, you can start by tearing down the
> > > > > > > least distruptive channel. eg try tearing down the migration connection
> > > > > > > first (which shouldn't negatively impact the guest), and only if that
> > > > > > > doesn't work then, move on to tear down the NBD connection (which risks
> > > > > > > data loss)  
> > > > > > 
> > > > > > I wonder if a different way would be to make all network connections
> > > > > > register with yank, but then make yank take a list of connections to
> > > > > > shutdown(2).
> > > > > 
> > > > > Good Idea. We could name the connections (/yank callbacks) in the
> > > > > form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
> > > > > (and add "netdev:...", etc. in the future). Then make yank take a
> > > > > list of connection names as you suggest and silently ignore connections
> > > > > that don't exist. And maybe even add a 'query-yank' oob command returning
> > > > > a list of registered connections so the management application can do
> > > > > pattern matching if it wants.
> > > 
> > > I'm generally not a big fan of silently ignoring things. Is there a
> > > specific requirement to do it in this case, or can management
> > > applications be expected to know which connections exist?
> > > 
> > > > Yes, that would make the yank command much more flexible in how it can
> > > > be used.
> > > > 
> > > > As an alternative to using formatted strings like this, it could be
> > > > modelled more explicitly in QAPI
> > > > 
> > > >   { 'struct':  'YankChannels',
> > > >     'data': { 'chardev': [ 'string' ],
> > > >               'nbd': ['string'],
> > > > 	      'migration': bool } }
> > > > 
> > > > In this example, 'chardev' would accept a list of chardev IDs which
> > > > have it enabled, 'nbd' would accept a list of block node IDs which
> > > > have it enabled, and migration is a singleton on/off.
> > > 
> > > Of course, it also means that the yank code needs to know about every
> > > single object that supports the operation, whereas if you only have
> > > strings, the objects could keep registering their connection with a
> > > generic function like yank_register_function() in this version.
> > > 
> > > I'm not sure if the additional complexity is worth the benefits.
> > 
> > I tend to agree; although we do have to ensure we either use an existing
> > naming scheme (e.g. QOM object names?) or make sure we've got a well
> > defined list of prefixes.
> 
> Not everything that has a network connection is a QOM object (in fact,
> neither migration nor chardev nor nbd are QOM objects).
> 
> I guess it would be nice to have a single namespace for everything in
> QEMU, but the reality is that we have a few separate ones. As long as we
> consistently add a prefix that identifies the namespace in question, I
> think that would work.
> 
> This means that if we're using node-name to identify the NBD connection,
> the namespace should be 'block' rather than 'nbd'.
> 
> One more thing to consider is, what if a single object has multiple
> connections? In the case of node-names, we have a limited set of allowed
> characters, so we can use one of the remaining characters as a separator
> and then suffix a counter. In other places, the identifier isn't
> restricted, so suffixing doesn't work. Maybe prefixing does, but it
> would have to be there from the beginning then.
> 
> And another thing: Do we really want to document this as limited to
> network connections? Another common cause of hangs is when you have
> image files on an NFS mount and the connection goes away. Of course, in
> the end this is still networking, but inside of QEMU it looks like
> accessing any other file. I'm not sure that we'll allow yanking access
> to image files anytime soon, but it might not hurt to keep it at the
> back of our mind as a potential option we might want the design to
> allow.

Are you referring to the in-kernel NFS client hangs here ?  AFAIK, it is
impossible to do anything to get out of those hangs from userspace, because
the thread is stuck in an uninterruptable sleep in kernel space.

If using the in-QEMU NFS client, then there is a network connection that
can be yanked just like the NBD client.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 11:26                   ` Daniel P. Berrangé
@ 2020-05-13 11:58                     ` Paolo Bonzini
  2020-05-13 12:25                       ` Dr. David Alan Gilbert
  2020-05-13 12:32                       ` Kevin Wolf
  2020-05-13 12:18                     ` Kevin Wolf
  1 sibling, 2 replies; 52+ messages in thread
From: Paolo Bonzini @ 2020-05-13 11:58 UTC (permalink / raw)
  To: Daniel P. Berrangé, Kevin Wolf
  Cc: Lukas Straub, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Dr. David Alan Gilbert

On 13/05/20 13:26, Daniel P. Berrangé wrote:
> Are you referring to the in-kernel NFS client hangs here ?  AFAIK, it is
> impossible to do anything to get out of those hangs from userspace, because
> the thread is stuck in an uninterruptable sleep in kernel space.
> 
> If using the in-QEMU NFS client, then there is a network connection that
> can be yanked just like the NBD client.

But it's a bad idea to yank it (and also the NBD client) because you're
not sure which wites have made it to the server (and to the medium) and
which haven't.

Effectively, the in-QEMU NFS client and NBD client are always operating
in "soft" mode, but we should always treat that as a bug (which cannot
be fixed) and not a feature for read-write images.

Paolo



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 11:26                   ` Daniel P. Berrangé
  2020-05-13 11:58                     ` Paolo Bonzini
@ 2020-05-13 12:18                     ` Kevin Wolf
  1 sibling, 0 replies; 52+ messages in thread
From: Kevin Wolf @ 2020-05-13 12:18 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Lukas Straub, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini, Dr. David Alan Gilbert

Am 13.05.2020 um 13:26 hat Daniel P. BerrangÃ© geschrieben:
> On Wed, May 13, 2020 at 01:13:20PM +0200, Kevin Wolf wrote:
> > Am 13.05.2020 um 12:53 hat Dr. David Alan Gilbert geschrieben:
> > > * Kevin Wolf (kwolf@redhat.com) wrote:
> > > > Am 12.05.2020 um 11:43 hat Daniel P. BerrangÃ© geschrieben:
> > > > > On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
> > > > > > On Mon, 11 May 2020 16:46:45 +0100
> > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > > 
> > > > > > > * Daniel P. BerrangÃƒÂ© (berrange@redhat.com) wrote: 
> > > > > > > > ...
> > > > > > > > That way if QEMU does get stuck, you can start by tearing down the
> > > > > > > > least distruptive channel. eg try tearing down the migration connection
> > > > > > > > first (which shouldn't negatively impact the guest), and only if that
> > > > > > > > doesn't work then, move on to tear down the NBD connection (which risks
> > > > > > > > data loss)  
> > > > > > > 
> > > > > > > I wonder if a different way would be to make all network connections
> > > > > > > register with yank, but then make yank take a list of connections to
> > > > > > > shutdown(2).
> > > > > > 
> > > > > > Good Idea. We could name the connections (/yank callbacks) in the
> > > > > > form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
> > > > > > (and add "netdev:...", etc. in the future). Then make yank take a
> > > > > > list of connection names as you suggest and silently ignore connections
> > > > > > that don't exist. And maybe even add a 'query-yank' oob command returning
> > > > > > a list of registered connections so the management application can do
> > > > > > pattern matching if it wants.
> > > > 
> > > > I'm generally not a big fan of silently ignoring things. Is there a
> > > > specific requirement to do it in this case, or can management
> > > > applications be expected to know which connections exist?
> > > > 
> > > > > Yes, that would make the yank command much more flexible in how it can
> > > > > be used.
> > > > > 
> > > > > As an alternative to using formatted strings like this, it could be
> > > > > modelled more explicitly in QAPI
> > > > > 
> > > > >   { 'struct':  'YankChannels',
> > > > >     'data': { 'chardev': [ 'string' ],
> > > > >               'nbd': ['string'],
> > > > > 	      'migration': bool } }
> > > > > 
> > > > > In this example, 'chardev' would accept a list of chardev IDs which
> > > > > have it enabled, 'nbd' would accept a list of block node IDs which
> > > > > have it enabled, and migration is a singleton on/off.
> > > > 
> > > > Of course, it also means that the yank code needs to know about every
> > > > single object that supports the operation, whereas if you only have
> > > > strings, the objects could keep registering their connection with a
> > > > generic function like yank_register_function() in this version.
> > > > 
> > > > I'm not sure if the additional complexity is worth the benefits.
> > > 
> > > I tend to agree; although we do have to ensure we either use an existing
> > > naming scheme (e.g. QOM object names?) or make sure we've got a well
> > > defined list of prefixes.
> > 
> > Not everything that has a network connection is a QOM object (in fact,
> > neither migration nor chardev nor nbd are QOM objects).
> > 
> > I guess it would be nice to have a single namespace for everything in
> > QEMU, but the reality is that we have a few separate ones. As long as we
> > consistently add a prefix that identifies the namespace in question, I
> > think that would work.
> > 
> > This means that if we're using node-name to identify the NBD connection,
> > the namespace should be 'block' rather than 'nbd'.
> > 
> > One more thing to consider is, what if a single object has multiple
> > connections? In the case of node-names, we have a limited set of allowed
> > characters, so we can use one of the remaining characters as a separator
> > and then suffix a counter. In other places, the identifier isn't
> > restricted, so suffixing doesn't work. Maybe prefixing does, but it
> > would have to be there from the beginning then.
> > 
> > And another thing: Do we really want to document this as limited to
> > network connections? Another common cause of hangs is when you have
> > image files on an NFS mount and the connection goes away. Of course, in
> > the end this is still networking, but inside of QEMU it looks like
> > accessing any other file. I'm not sure that we'll allow yanking access
> > to image files anytime soon, but it might not hurt to keep it at the
> > back of our mind as a potential option we might want the design to
> > allow.
> 
> Are you referring to the in-kernel NFS client hangs here ?  AFAIK, it is
> impossible to do anything to get out of those hangs from userspace, because
> the thread is stuck in an uninterruptable sleep in kernel space.

I'm not sure if it's always uninterruptible, but yes, in general you
would be sacrificing the block node and some of its worker threads to
get the VM unstuck.

The next thing you should probably do is migrate off to another QEMU
instance without a hanging thread. But at least it would let you migrate
off instead of getting stuck while trying to wait for all running
requests to complete.

> If using the in-QEMU NFS client, then there is a network connection
> that can be yanked just like the NBD client.

Yes, that's the same case as NBD.

Kevin



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 11:58                     ` Paolo Bonzini
@ 2020-05-13 12:25                       ` Dr. David Alan Gilbert
  2020-05-13 12:32                       ` Kevin Wolf
  1 sibling, 0 replies; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-13 12:25 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Kevin Wolf, Lukas Straub, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau

* Paolo Bonzini (pbonzini@redhat.com) wrote:
> On 13/05/20 13:26, Daniel P. BerrangÃ© wrote:
> > Are you referring to the in-kernel NFS client hangs here ?  AFAIK, it is
> > impossible to do anything to get out of those hangs from userspace, because
> > the thread is stuck in an uninterruptable sleep in kernel space.
> > 
> > If using the in-QEMU NFS client, then there is a network connection that
> > can be yanked just like the NBD client.
> 
> But it's a bad idea to yank it (and also the NBD client) because you're
> not sure which wites have made it to the server (and to the medium) and
> which haven't.

No, that's OK - if you look at the COLO case, and some other cases,
you've got a dead storage device but your redundant pair might be OK;
so it's OK to yank it.
Other similar storage cases are trying to migrate a VM that has one dead
disk, even if you know and accept it's dead and unresponding, you often
can't kill it off if the device is hung.

> Effectively, the in-QEMU NFS client and NBD client are always operating
> in "soft" mode, but we should always treat that as a bug (which cannot
> be fixed) and not a feature for read-write images.
Dave

> 
> Paolo
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 11:58                     ` Paolo Bonzini
  2020-05-13 12:25                       ` Dr. David Alan Gilbert
@ 2020-05-13 12:32                       ` Kevin Wolf
  1 sibling, 0 replies; 52+ messages in thread
From: Kevin Wolf @ 2020-05-13 12:32 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Lukas Straub, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Dr. David Alan Gilbert,
	Marc-André Lureau, Max Reitz

Am 13.05.2020 um 13:58 hat Paolo Bonzini geschrieben:
> On 13/05/20 13:26, Daniel P. BerrangÃ© wrote:
> > Are you referring to the in-kernel NFS client hangs here ?  AFAIK, it is
> > impossible to do anything to get out of those hangs from userspace, because
> > the thread is stuck in an uninterruptable sleep in kernel space.
> > 
> > If using the in-QEMU NFS client, then there is a network connection that
> > can be yanked just like the NBD client.
> 
> But it's a bad idea to yank it (and also the NBD client) because you're
> not sure which wites have made it to the server (and to the medium) and
> which haven't.

This (undefined content) is generally what guests should expect when a
write request returns an error.

> Effectively, the in-QEMU NFS client and NBD client are always operating
> in "soft" mode, but we should always treat that as a bug (which cannot
> be fixed) and not a feature for read-write images.

It certainly means that you can't continue as if nothing had happened.

However, writing to the same area again (and successfully) restores a
valid state, so with werror=stop you would get into a state where you
can later reconnect and continue where you stopped, but the monitor
would stay responsive during that time.

Or if the disk isn't critical for the guest, you could replace the NBD
block node with a node that always returns I/O errors, so the guest
would see a broken disk (actually, just reporting the errors from
yanking to the guest might be enough to make it give up on the disk and
stop sending new requests). Then you can resume the VM right away.

Kevin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 11:13                 ` Kevin Wolf
  2020-05-13 11:26                   ` Daniel P. Berrangé
@ 2020-05-13 12:56                   ` Dr. David Alan Gilbert
  2020-05-13 13:08                     ` Daniel P. Berrangé
  2020-09-01 10:35                   ` Markus Armbruster
  2 siblings, 1 reply; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-13 12:56 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: Lukas Straub, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini

* Kevin Wolf (kwolf@redhat.com) wrote:
> Am 13.05.2020 um 12:53 hat Dr. David Alan Gilbert geschrieben:
> > * Kevin Wolf (kwolf@redhat.com) wrote:
> > > Am 12.05.2020 um 11:43 hat Daniel P. BerrangÃ© geschrieben:
> > > > On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
> > > > > On Mon, 11 May 2020 16:46:45 +0100
> > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > 
> > > > > > * Daniel P. BerrangÃƒÂ© (berrange@redhat.com) wrote: 
> > > > > > > ...
> > > > > > > That way if QEMU does get stuck, you can start by tearing down the
> > > > > > > least distruptive channel. eg try tearing down the migration connection
> > > > > > > first (which shouldn't negatively impact the guest), and only if that
> > > > > > > doesn't work then, move on to tear down the NBD connection (which risks
> > > > > > > data loss)  
> > > > > > 
> > > > > > I wonder if a different way would be to make all network connections
> > > > > > register with yank, but then make yank take a list of connections to
> > > > > > shutdown(2).
> > > > > 
> > > > > Good Idea. We could name the connections (/yank callbacks) in the
> > > > > form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
> > > > > (and add "netdev:...", etc. in the future). Then make yank take a
> > > > > list of connection names as you suggest and silently ignore connections
> > > > > that don't exist. And maybe even add a 'query-yank' oob command returning
> > > > > a list of registered connections so the management application can do
> > > > > pattern matching if it wants.
> > > 
> > > I'm generally not a big fan of silently ignoring things. Is there a
> > > specific requirement to do it in this case, or can management
> > > applications be expected to know which connections exist?
> > > 
> > > > Yes, that would make the yank command much more flexible in how it can
> > > > be used.
> > > > 
> > > > As an alternative to using formatted strings like this, it could be
> > > > modelled more explicitly in QAPI
> > > > 
> > > >   { 'struct':  'YankChannels',
> > > >     'data': { 'chardev': [ 'string' ],
> > > >               'nbd': ['string'],
> > > > 	      'migration': bool } }
> > > > 
> > > > In this example, 'chardev' would accept a list of chardev IDs which
> > > > have it enabled, 'nbd' would accept a list of block node IDs which
> > > > have it enabled, and migration is a singleton on/off.
> > > 
> > > Of course, it also means that the yank code needs to know about every
> > > single object that supports the operation, whereas if you only have
> > > strings, the objects could keep registering their connection with a
> > > generic function like yank_register_function() in this version.
> > > 
> > > I'm not sure if the additional complexity is worth the benefits.
> > 
> > I tend to agree; although we do have to ensure we either use an existing
> > naming scheme (e.g. QOM object names?) or make sure we've got a well
> > defined list of prefixes.
> 
> Not everything that has a network connection is a QOM object (in fact,
> neither migration nor chardev nor nbd are QOM objects).

Hmm, migrationstate is a qdev object.

> I guess it would be nice to have a single namespace for everything in
> QEMU, but the reality is that we have a few separate ones. As long as we
> consistently add a prefix that identifies the namespace in question, I
> think that would work.

> This means that if we're using node-name to identify the NBD connection,
> the namespace should be 'block' rather than 'nbd'.
> 
> One more thing to consider is, what if a single object has multiple
> connections? In the case of node-names, we have a limited set of allowed
> characters, so we can use one of the remaining characters as a separator
> and then suffix a counter. In other places, the identifier isn't
> restricted, so suffixing doesn't work. Maybe prefixing does, but it
> would have to be there from the beginning then.

Yeh I worry about whether on nbd if you can have multiple nbd
connections to one block device.

> And another thing: Do we really want to document this as limited to
> network connections? Another common cause of hangs is when you have
> image files on an NFS mount and the connection goes away. Of course, in
> the end this is still networking, but inside of QEMU it looks like
> accessing any other file. I'm not sure that we'll allow yanking access
> to image files anytime soon, but it might not hurt to keep it at the
> back of our mind as a potential option we might want the design to
> allow.

Yep.

Dave

> Kevin
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 12:56                   ` Dr. David Alan Gilbert
@ 2020-05-13 13:08                     ` Daniel P. Berrangé
  2020-05-13 13:48                       ` Dr. David Alan Gilbert
  0 siblings, 1 reply; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-13 13:08 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Kevin Wolf, Lukas Straub, qemu-block, Juan Quintela, qemu-devel,
	Max Reitz, Marc-André Lureau, Paolo Bonzini

On Wed, May 13, 2020 at 01:56:24PM +0100, Dr. David Alan Gilbert wrote:
> * Kevin Wolf (kwolf@redhat.com) wrote:
> > I guess it would be nice to have a single namespace for everything in
> > QEMU, but the reality is that we have a few separate ones. As long as we
> > consistently add a prefix that identifies the namespace in question, I
> > think that would work.
> 
> > This means that if we're using node-name to identify the NBD connection,
> > the namespace should be 'block' rather than 'nbd'.
> > 
> > One more thing to consider is, what if a single object has multiple
> > connections? In the case of node-names, we have a limited set of allowed
> > characters, so we can use one of the remaining characters as a separator
> > and then suffix a counter. In other places, the identifier isn't
> > restricted, so suffixing doesn't work. Maybe prefixing does, but it
> > would have to be there from the beginning then.
> 
> Yeh I worry about whether on nbd if you can have multiple nbd
> connections to one block device.

The kernel NBD driver now supports multiple parallel connections.
QEMU hasn't implemented this in its NBD code yet, but I certainly
see that being in scope for future.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 13:08                     ` Daniel P. Berrangé
@ 2020-05-13 13:48                       ` Dr. David Alan Gilbert
  2020-05-13 13:57                         ` Eric Blake
  2020-05-13 14:06                         ` Kevin Wolf
  0 siblings, 2 replies; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-13 13:48 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, Lukas Straub, qemu-block, Juan Quintela, qemu-devel,
	Max Reitz, Marc-André Lureau, Paolo Bonzini

* Daniel P. BerrangÃ© (berrange@redhat.com) wrote:
> On Wed, May 13, 2020 at 01:56:24PM +0100, Dr. David Alan Gilbert wrote:
> > * Kevin Wolf (kwolf@redhat.com) wrote:
> > > I guess it would be nice to have a single namespace for everything in
> > > QEMU, but the reality is that we have a few separate ones. As long as we
> > > consistently add a prefix that identifies the namespace in question, I
> > > think that would work.
> > 
> > > This means that if we're using node-name to identify the NBD connection,
> > > the namespace should be 'block' rather than 'nbd'.
> > > 
> > > One more thing to consider is, what if a single object has multiple
> > > connections? In the case of node-names, we have a limited set of allowed
> > > characters, so we can use one of the remaining characters as a separator
> > > and then suffix a counter. In other places, the identifier isn't
> > > restricted, so suffixing doesn't work. Maybe prefixing does, but it
> > > would have to be there from the beginning then.
> > 
> > Yeh I worry about whether on nbd if you can have multiple nbd
> > connections to one block device.
> 
> The kernel NBD driver now supports multiple parallel connections.
> QEMU hasn't implemented this in its NBD code yet, but I certainly
> see that being in scope for future.

It's not parallel for performance that worries me, it's more about
separateq connections for separate uses - e.g. if we're serving the same
read-only disk to multiple separate things.

Dave
 
> 
> Regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 13:48                       ` Dr. David Alan Gilbert
@ 2020-05-13 13:57                         ` Eric Blake
  2020-05-13 14:06                         ` Kevin Wolf
  1 sibling, 0 replies; 52+ messages in thread
From: Eric Blake @ 2020-05-13 13:57 UTC (permalink / raw)
  To: Dr. David Alan Gilbert, Daniel P. Berrangé
  Cc: Kevin Wolf, Lukas Straub, qemu-block, Juan Quintela, qemu-devel,
	Max Reitz, Paolo Bonzini, Marc-André Lureau

On 5/13/20 8:48 AM, Dr. David Alan Gilbert wrote:

>>> Yeh I worry about whether on nbd if you can have multiple nbd
>>> connections to one block device.
>>
>> The kernel NBD driver now supports multiple parallel connections.
>> QEMU hasn't implemented this in its NBD code yet, but I certainly
>> see that being in scope for future.
> 
> It's not parallel for performance that worries me, it's more about
> separateq connections for separate uses - e.g. if we're serving the same
> read-only disk to multiple separate things.

qemu allows multiple simultaneous NBD clients, but does not promise 
cache consistency between them.  As long as all the clients are 
read-only, you can't tell the difference (and thus, for read-only 
connections, qemu even advertises NBD_FLAG_CAN_MULTI_CONN per the NBD 
spec).  See also 'qemu-nbd -e' for controlling how many clients qemu-nbd 
will permit at once (when qemu proper is serving NBD, we don't currently 
expose a knob over QMP to control client count, but instead just blindly 
allow multiple connections).  But as soon as one of the clients wants to 
write, that is where we would need to improve code before making it 
safe, so there, we do not advertise MULTI_CONN support.

As for cases with multiple simultaneous clients: we already have those. 
In the case of incremental backups with an NBD export, my demo code for 
copying out the incremental backup involved two parallel NBD clients - 
one using 'qemu-img map' to probe which portions of the image were 
dirty, and a second client actually performing the reads.  But 
incremental backups are read-only situations (the clients are not 
modifying qemu state).

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 13:48                       ` Dr. David Alan Gilbert
  2020-05-13 13:57                         ` Eric Blake
@ 2020-05-13 14:06                         ` Kevin Wolf
  2020-05-13 14:18                           ` Eric Blake
  1 sibling, 1 reply; 52+ messages in thread
From: Kevin Wolf @ 2020-05-13 14:06 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Lukas Straub, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini

Am 13.05.2020 um 15:48 hat Dr. David Alan Gilbert geschrieben:
> * Daniel P. BerrangÃ© (berrange@redhat.com) wrote:
> > On Wed, May 13, 2020 at 01:56:24PM +0100, Dr. David Alan Gilbert wrote:
> > > * Kevin Wolf (kwolf@redhat.com) wrote:
> > > > I guess it would be nice to have a single namespace for everything in
> > > > QEMU, but the reality is that we have a few separate ones. As long as we
> > > > consistently add a prefix that identifies the namespace in question, I
> > > > think that would work.
> > > 
> > > > This means that if we're using node-name to identify the NBD connection,
> > > > the namespace should be 'block' rather than 'nbd'.
> > > > 
> > > > One more thing to consider is, what if a single object has multiple
> > > > connections? In the case of node-names, we have a limited set of allowed
> > > > characters, so we can use one of the remaining characters as a separator
> > > > and then suffix a counter. In other places, the identifier isn't
> > > > restricted, so suffixing doesn't work. Maybe prefixing does, but it
> > > > would have to be there from the beginning then.
> > > 
> > > Yeh I worry about whether on nbd if you can have multiple nbd
> > > connections to one block device.
> > 
> > The kernel NBD driver now supports multiple parallel connections.
> > QEMU hasn't implemented this in its NBD code yet, but I certainly
> > see that being in scope for future.
> 
> It's not parallel for performance that worries me, it's more about
> separateq connections for separate uses - e.g. if we're serving the same
> read-only disk to multiple separate things.

That would be a concern for the NBD server. I'm not sure if anything in
QEMU ever waits for NBD servers (except for the client on the other side
of the connection, of course), so there might be no use case for yanking
their connections.

Anyway, here we were talking about the NBD client, which always accesses
one disk. If you access a second disk, you have a second NBD block node.

Kevin



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 14:06                         ` Kevin Wolf
@ 2020-05-13 14:18                           ` Eric Blake
  0 siblings, 0 replies; 52+ messages in thread
From: Eric Blake @ 2020-05-13 14:18 UTC (permalink / raw)
  To: Kevin Wolf, Dr. David Alan Gilbert
  Cc: Lukas Straub, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz, Paolo Bonzini,
	Marc-André Lureau

On 5/13/20 9:06 AM, Kevin Wolf wrote:

>>>>> One more thing to consider is, what if a single object has multiple
>>>>> connections? In the case of node-names, we have a limited set of allowed
>>>>> characters, so we can use one of the remaining characters as a separator
>>>>> and then suffix a counter. In other places, the identifier isn't
>>>>> restricted, so suffixing doesn't work. Maybe prefixing does, but it
>>>>> would have to be there from the beginning then.
>>>>
>>>> Yeh I worry about whether on nbd if you can have multiple nbd
>>>> connections to one block device.
>>>
>>> The kernel NBD driver now supports multiple parallel connections.
>>> QEMU hasn't implemented this in its NBD code yet, but I certainly
>>> see that being in scope for future.
>>
>> It's not parallel for performance that worries me, it's more about
>> separateq connections for separate uses - e.g. if we're serving the same
>> read-only disk to multiple separate things.
> 
> That would be a concern for the NBD server. I'm not sure if anything in
> QEMU ever waits for NBD servers (except for the client on the other side
> of the connection, of course), so there might be no use case for yanking
> their connections.
> 
> Anyway, here we were talking about the NBD client, which always accesses
> one disk. If you access a second disk, you have a second NBD block node.

Ah, right, that's the other direction.  No, we do not currently support 
a single qemu block node backed by multiple NBD clients to one (or more, 
if they are serving identical content in a failover scenario) NBD 
servers.  Performance could indeed be potentially improved by doing 
that, but for now, every time qemu behaves as a new NBD client, it is 
via an additional block node.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-11 11:14 [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Lukas Straub
                   ` (5 preceding siblings ...)
  2020-05-11 11:49 ` [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Daniel P. Berrangé
@ 2020-05-13 19:12 ` Lukas Straub
  2020-05-14 10:41   ` Kevin Wolf
  2020-05-14 11:01   ` Dr. David Alan Gilbert
  6 siblings, 2 replies; 52+ messages in thread
From: Lukas Straub @ 2020-05-13 19:12 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, Dr. David Alan Gilbert, Max Reitz,
	Paolo Bonzini, Marc-André Lureau

[-- Attachment #1: Type: text/plain, Size: 1583 bytes --]

Terminology:
instance = one (nbd) blockdev/one chardev/the single migrationstate
connection = one TCP connection

Hello Everyone,
Having read all the comments, here is proposal v2:
Every instance registers itself with a unique name in the form "blockdev:<node-name>", "chardev:<chardev-name>" and "migration" using yank_register_instance which will do some sanity checks like checking if the same name exists already. Then (multiple) yank functions can be registered as needed with that single name. When the instance exits/is removed, it unregisters all yank functions and unregisters it's name with yank_unregister_instance which will check if all yank functions where unregistered.
Every instance that supports the yank feature will register itself and the yank functions unconditionally (No extra 'yank' option per instance).
The 'query-yank' oob qmp command lists the names of all registered instances.
The 'yank' oob qmp command takes a list of names and for every name calls all yank functions registered with that name. Before doing anything, it will check that all names exist.

If the instance has multiple connections (say, migration with multifd), i don't think it makes much sense to just shutdown one connection. Calling 'yank' on a instance will always shutdown all connections of that instance.

Yank functions are generic and in no way limited to connections. Say, if migration is started to an 'exec:' address, migration could register a yank function to kill that external command on yank (Won't be implemented yet though).

Regards,
Lukas Straub

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 19:12 ` Lukas Straub
@ 2020-05-14 10:41   ` Kevin Wolf
  2020-05-14 11:01   ` Dr. David Alan Gilbert
  1 sibling, 0 replies; 52+ messages in thread
From: Kevin Wolf @ 2020-05-14 10:41 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz, Paolo Bonzini,
	Marc-André Lureau, Dr. David Alan Gilbert

[-- Attachment #1: Type: text/plain, Size: 3123 bytes --]

Am 13.05.2020 um 21:12 hat Lukas Straub geschrieben:
> Terminology:
> instance = one (nbd) blockdev/one chardev/the single migrationstate
> connection = one TCP connection
> 
> Hello Everyone,
> Having read all the comments, here is proposal v2:

Looks quite good to me.

> Every instance registers itself with a unique name in the form
> "blockdev:<node-name>", "chardev:<chardev-name>" and "migration" using
> yank_register_instance which will do some sanity checks like checking
> if the same name exists already. Then (multiple) yank functions can be
> registered as needed with that single name. When the instance exits/is
> removed, it unregisters all yank functions and unregisters it's name
> with yank_unregister_instance which will check if all yank functions
> where unregistered.

Feels like nitpicking, but you say that functions are registered with a
name that you have previously registered. If you mean literally passing
a string, could this lead to callers forgetting to register it first?

Maybe yank_register_instance() should return something like a
YankInstance object that must be passed to yank_register_function().
Then you would probably also have a list of all functions registered for
a single instance so that yank_unregister_instance() could automatically
remove all of them instead of requiring the instance to do so.

> Every instance that supports the yank feature will register itself and
> the yank functions unconditionally (No extra 'yank' option per
> instance).
> The 'query-yank' oob qmp command lists the names of all registered
> instances.
> The 'yank' oob qmp command takes a list of names and for every name
> calls all yank functions registered with that name. Before doing
> anything, it will check that all names exist.
> 
> If the instance has multiple connections (say, migration with
> multifd), i don't think it makes much sense to just shutdown one
> connection. Calling 'yank' on a instance will always shutdown all
> connections of that instance.

I think it depends. If shutting down one connection basically kills the
functionality of the whole instance, there is no reason not to kill all
connections. But if the instance could continue working on the second
connection, maybe it shouldn't be killed.

Anyway, we can extend things as needed later. I already mentioned
elsewhere in this thread that block node-names have a restricted set of
allowed character, so having a suffix to distinguish multiple yankable
things is possible. I just checked chardev and it also restricts the
allowed set of characters, so the same applies. 'migration' is only a
fixed string, so it's trivially extensible.

So we know a path forward if we ever need to yank individual
connections, which is good enough for me.

> Yank functions are generic and in no way limited to connections. Say,
> if migration is started to an 'exec:' address, migration could
> register a yank function to kill that external command on yank (Won't
> be implemented yet though).

Yes, this makes sense as a potential future use case.

Kevin

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 19:12 ` Lukas Straub
  2020-05-14 10:41   ` Kevin Wolf
@ 2020-05-14 11:01   ` Dr. David Alan Gilbert
  1 sibling, 0 replies; 52+ messages in thread
From: Dr. David Alan Gilbert @ 2020-05-14 11:01 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Max Reitz, Paolo Bonzini,
	Marc-André Lureau

* Lukas Straub (lukasstraub2@web.de) wrote:
> Terminology:
> instance = one (nbd) blockdev/one chardev/the single migrationstate
> connection = one TCP connection
> 
> Hello Everyone,
> Having read all the comments, here is proposal v2:
> Every instance registers itself with a unique name in the form "blockdev:<node-name>", "chardev:<chardev-name>" and "migration" using yank_register_instance which will do some sanity checks like checking if the same name exists already. Then (multiple) yank functions can be registered as needed with that single name. When the instance exits/is removed, it unregisters all yank functions and unregisters it's name with yank_unregister_instance which will check if all yank functions where unregistered.
> Every instance that supports the yank feature will register itself and the yank functions unconditionally (No extra 'yank' option per instance).
> The 'query-yank' oob qmp command lists the names of all registered instances.
> The 'yank' oob qmp command takes a list of names and for every name calls all yank functions registered with that name. Before doing anything, it will check that all names exist.
> 
> If the instance has multiple connections (say, migration with multifd), i don't think it makes much sense to just shutdown one connection. Calling 'yank' on a instance will always shutdown all connections of that instance.

Agreed.

> Yank functions are generic and in no way limited to connections. Say, if migration is started to an 'exec:' address, migration could register a yank function to kill that external command on yank (Won't be implemented yet though).

One thing we need to be care of is that the yank functions stay suitable
for OOB calling.

Dave

> Regards,
> Lukas Straub


--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-12  8:54       ` Daniel P. Berrangé
@ 2020-05-15  9:48         ` Lukas Straub
  2020-05-15 10:04           ` Daniel P. Berrangé
  0 siblings, 1 reply; 52+ messages in thread
From: Lukas Straub @ 2020-05-15  9:48 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini, Dr. David Alan Gilbert

[-- Attachment #1: Type: text/plain, Size: 2286 bytes --]

On Tue, 12 May 2020 09:54:58 +0100
Daniel P. Berrangé <berrange@redhat.com> wrote:

> On Mon, May 11, 2020 at 07:05:24PM +0200, Lukas Straub wrote:
> > On Mon, 11 May 2020 17:19:09 +0100
> > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> >   
> > > * Lukas Straub (lukasstraub2@web.de) wrote:  
> > > > Add yank option, pass it to the socket-channel and register a yank
> > > > function which sets s->state = NBD_CLIENT_QUIT. This is the same
> > > > behaviour as if an error occured.
> > > > 
> > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>    
> > >   
> > > > +static void nbd_yank(void *opaque)
> > > > +{
> > > > +    BlockDriverState *bs = opaque;
> > > > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > > > +
> > > > +    atomic_set(&s->state, NBD_CLIENT_QUIT);    
> > > 
> > > I think I was expecting a shutdown on the socket here - why doesn't it
> > > have one?  
> > 
> > For nbd, we register two yank functions: This one and we enable
> > the yank feature on the qio channel (see function
> > nbd_establish_connection below).  
> 
> As mentioned on the earlier patch, I don't want to see any yank
> code in the QIOChannel object directly. This nbd_yank function
> can simply call the qio_channel_shutdown() function directly
> and avoid need for modifying the QIOChannel object with yank
> support.

Hi,
Looking at it again, the problem is not with registering the yank functions, but with tracking the lifetime of it. Suppose we add qio_channel_shutdown to the yank_nbd function. Then we need to unregister it whenever the QIOChannel object is freed.

In the code that would lead to the following constructs in a lot of places:
     if (local_err) {
         yank_unregister_function(s->yank_name, yank_nbd, bs);
         object_unref(OBJECT(sioc));
         error_propagate(errp, local_err);
         return NULL;
     }

If you don't want the code in QIOChannel I guess I can create a subclass (YankableChannelSocket?) of QIOChannelSocket. What do you think?

Regards,
Lukas Straub

> This will make the NBD code clearer too, as we can see exactly
> what actions are performed at NBD layer when a yank happens,
> which is what David was not seeing clearly here.
> 
> Regards,
> Daniel


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-15  9:48         ` Lukas Straub
@ 2020-05-15 10:04           ` Daniel P. Berrangé
  2020-05-15 10:14             ` Lukas Straub
  0 siblings, 1 reply; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-15 10:04 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Paolo Bonzini, Marc-André Lureau, Dr. David Alan Gilbert

On Fri, May 15, 2020 at 11:48:18AM +0200, Lukas Straub wrote:
> On Tue, 12 May 2020 09:54:58 +0100
> Daniel P. Berrangé <berrange@redhat.com> wrote:
> 
> > On Mon, May 11, 2020 at 07:05:24PM +0200, Lukas Straub wrote:
> > > On Mon, 11 May 2020 17:19:09 +0100
> > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > >   
> > > > * Lukas Straub (lukasstraub2@web.de) wrote:  
> > > > > Add yank option, pass it to the socket-channel and register a yank
> > > > > function which sets s->state = NBD_CLIENT_QUIT. This is the same
> > > > > behaviour as if an error occured.
> > > > > 
> > > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>    
> > > >   
> > > > > +static void nbd_yank(void *opaque)
> > > > > +{
> > > > > +    BlockDriverState *bs = opaque;
> > > > > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > > > > +
> > > > > +    atomic_set(&s->state, NBD_CLIENT_QUIT);    
> > > > 
> > > > I think I was expecting a shutdown on the socket here - why doesn't it
> > > > have one?  
> > > 
> > > For nbd, we register two yank functions: This one and we enable
> > > the yank feature on the qio channel (see function
> > > nbd_establish_connection below).  
> > 
> > As mentioned on the earlier patch, I don't want to see any yank
> > code in the QIOChannel object directly. This nbd_yank function
> > can simply call the qio_channel_shutdown() function directly
> > and avoid need for modifying the QIOChannel object with yank
> > support.
> 
> Hi,
> Looking at it again, the problem is not with registering the yank functions, but with tracking the lifetime of it. Suppose we add qio_channel_shutdown to the yank_nbd function. Then we need to unregister it whenever the QIOChannel object is freed.
> 
> In the code that would lead to the following constructs in a lot of places:
>      if (local_err) {
>          yank_unregister_function(s->yank_name, yank_nbd, bs);
>          object_unref(OBJECT(sioc));
>          error_propagate(errp, local_err);
>          return NULL;
>      }

The nbd patch here already has a yank_unregister_function() so I'm
not seeing anything changes in that respect. The "yank_nbd" function
should check that the I/O channel is non-NULL before calling the
qio_channel_shutdown method.

> If you don't want the code in QIOChannel I guess I can create a
> subclass (YankableChannelSocket?) of QIOChannelSocket. What do
> you think?

That's no better, and I don't see any compelling need for it as calling
yank_unregister_function is something nbd already does in its nbd_close
function. It isn't a burden for the other backends to do similarly.



Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-15 10:04           ` Daniel P. Berrangé
@ 2020-05-15 10:14             ` Lukas Straub
  2020-05-15 10:26               ` Daniel P. Berrangé
  0 siblings, 1 reply; 52+ messages in thread
From: Lukas Straub @ 2020-05-15 10:14 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Paolo Bonzini, Marc-André Lureau, Dr. David Alan Gilbert

[-- Attachment #1: Type: text/plain, Size: 3012 bytes --]

On Fri, 15 May 2020 11:04:13 +0100
Daniel P. Berrangé <berrange@redhat.com> wrote:

> On Fri, May 15, 2020 at 11:48:18AM +0200, Lukas Straub wrote:
> > On Tue, 12 May 2020 09:54:58 +0100
> > Daniel P. Berrangé <berrange@redhat.com> wrote:
> >   
> > > On Mon, May 11, 2020 at 07:05:24PM +0200, Lukas Straub wrote:  
> > > > On Mon, 11 May 2020 17:19:09 +0100
> > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > >     
> > > > > * Lukas Straub (lukasstraub2@web.de) wrote:    
> > > > > > Add yank option, pass it to the socket-channel and register a yank
> > > > > > function which sets s->state = NBD_CLIENT_QUIT. This is the same
> > > > > > behaviour as if an error occured.
> > > > > > 
> > > > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>      
> > > > >     
> > > > > > +static void nbd_yank(void *opaque)
> > > > > > +{
> > > > > > +    BlockDriverState *bs = opaque;
> > > > > > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > > > > > +
> > > > > > +    atomic_set(&s->state, NBD_CLIENT_QUIT);      
> > > > > 
> > > > > I think I was expecting a shutdown on the socket here - why doesn't it
> > > > > have one?    
> > > > 
> > > > For nbd, we register two yank functions: This one and we enable
> > > > the yank feature on the qio channel (see function
> > > > nbd_establish_connection below).    
> > > 
> > > As mentioned on the earlier patch, I don't want to see any yank
> > > code in the QIOChannel object directly. This nbd_yank function
> > > can simply call the qio_channel_shutdown() function directly
> > > and avoid need for modifying the QIOChannel object with yank
> > > support.  
> > 
> > Hi,
> > Looking at it again, the problem is not with registering the yank functions, but with tracking the lifetime of it. Suppose we add qio_channel_shutdown to the yank_nbd function. Then we need to unregister it whenever the QIOChannel object is freed.
> > 
> > In the code that would lead to the following constructs in a lot of places:
> >      if (local_err) {
> >          yank_unregister_function(s->yank_name, yank_nbd, bs);
> >          object_unref(OBJECT(sioc));
> >          error_propagate(errp, local_err);
> >          return NULL;
> >      }  
> 
> The nbd patch here already has a yank_unregister_function() so I'm
> not seeing anything changes in that respect. The "yank_nbd" function
> should check that the I/O channel is non-NULL before calling the
> qio_channel_shutdown method.

Hmm, but if object_unref frees the object, it doesn't set the pointer to NULL does it?

> > If you don't want the code in QIOChannel I guess I can create a
> > subclass (YankableChannelSocket?) of QIOChannelSocket. What do
> > you think?  
> 
> That's no better, and I don't see any compelling need for it as calling
> yank_unregister_function is something nbd already does in its nbd_close
> function. It isn't a burden for the other backends to do similarly.
> 
> 
> 
> Regards,
> Daniel


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-15 10:14             ` Lukas Straub
@ 2020-05-15 10:26               ` Daniel P. Berrangé
  2020-05-15 13:03                 ` Lukas Straub
  0 siblings, 1 reply; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-15 10:26 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini, Dr. David Alan Gilbert

On Fri, May 15, 2020 at 12:14:47PM +0200, Lukas Straub wrote:
> On Fri, 15 May 2020 11:04:13 +0100
> Daniel P. Berrangé <berrange@redhat.com> wrote:
> 
> > On Fri, May 15, 2020 at 11:48:18AM +0200, Lukas Straub wrote:
> > > On Tue, 12 May 2020 09:54:58 +0100
> > > Daniel P. Berrangé <berrange@redhat.com> wrote:
> > >   
> > > > On Mon, May 11, 2020 at 07:05:24PM +0200, Lukas Straub wrote:  
> > > > > On Mon, 11 May 2020 17:19:09 +0100
> > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > >     
> > > > > > * Lukas Straub (lukasstraub2@web.de) wrote:    
> > > > > > > Add yank option, pass it to the socket-channel and register a yank
> > > > > > > function which sets s->state = NBD_CLIENT_QUIT. This is the same
> > > > > > > behaviour as if an error occured.
> > > > > > > 
> > > > > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>      
> > > > > >     
> > > > > > > +static void nbd_yank(void *opaque)
> > > > > > > +{
> > > > > > > +    BlockDriverState *bs = opaque;
> > > > > > > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > > > > > > +
> > > > > > > +    atomic_set(&s->state, NBD_CLIENT_QUIT);      
> > > > > > 
> > > > > > I think I was expecting a shutdown on the socket here - why doesn't it
> > > > > > have one?    
> > > > > 
> > > > > For nbd, we register two yank functions: This one and we enable
> > > > > the yank feature on the qio channel (see function
> > > > > nbd_establish_connection below).    
> > > > 
> > > > As mentioned on the earlier patch, I don't want to see any yank
> > > > code in the QIOChannel object directly. This nbd_yank function
> > > > can simply call the qio_channel_shutdown() function directly
> > > > and avoid need for modifying the QIOChannel object with yank
> > > > support.  
> > > 
> > > Hi,
> > > Looking at it again, the problem is not with registering the yank functions, but with tracking the lifetime of it. Suppose we add qio_channel_shutdown to the yank_nbd function. Then we need to unregister it whenever the QIOChannel object is freed.
> > > 
> > > In the code that would lead to the following constructs in a lot of places:
> > >      if (local_err) {
> > >          yank_unregister_function(s->yank_name, yank_nbd, bs);
> > >          object_unref(OBJECT(sioc));
> > >          error_propagate(errp, local_err);
> > >          return NULL;
> > >      }  
> > 
> > The nbd patch here already has a yank_unregister_function() so I'm
> > not seeing anything changes in that respect. The "yank_nbd" function
> > should check that the I/O channel is non-NULL before calling the
> > qio_channel_shutdown method.
> 
> Hmm, but if object_unref frees the object, it doesn't set the
> pointer to NULL does it?

So set  "ioc = NULL" after calling object_unref. AFAICT, nbd already
does exactly this.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-15 10:26               ` Daniel P. Berrangé
@ 2020-05-15 13:03                 ` Lukas Straub
  2020-05-15 13:45                   ` Daniel P. Berrangé
  0 siblings, 1 reply; 52+ messages in thread
From: Lukas Straub @ 2020-05-15 13:03 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Marc-André Lureau, Paolo Bonzini, Dr. David Alan Gilbert

[-- Attachment #1: Type: text/plain, Size: 3660 bytes --]

On Fri, 15 May 2020 11:26:13 +0100
Daniel P. Berrangé <berrange@redhat.com> wrote:

> On Fri, May 15, 2020 at 12:14:47PM +0200, Lukas Straub wrote:
> > On Fri, 15 May 2020 11:04:13 +0100
> > Daniel P. Berrangé <berrange@redhat.com> wrote:
> >   
> > > On Fri, May 15, 2020 at 11:48:18AM +0200, Lukas Straub wrote:  
> > > > On Tue, 12 May 2020 09:54:58 +0100
> > > > Daniel P. Berrangé <berrange@redhat.com> wrote:
> > > >     
> > > > > On Mon, May 11, 2020 at 07:05:24PM +0200, Lukas Straub wrote:    
> > > > > > On Mon, 11 May 2020 17:19:09 +0100
> > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > >       
> > > > > > > * Lukas Straub (lukasstraub2@web.de) wrote:      
> > > > > > > > Add yank option, pass it to the socket-channel and register a yank
> > > > > > > > function which sets s->state = NBD_CLIENT_QUIT. This is the same
> > > > > > > > behaviour as if an error occured.
> > > > > > > > 
> > > > > > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>        
> > > > > > >       
> > > > > > > > +static void nbd_yank(void *opaque)
> > > > > > > > +{
> > > > > > > > +    BlockDriverState *bs = opaque;
> > > > > > > > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > > > > > > > +
> > > > > > > > +    atomic_set(&s->state, NBD_CLIENT_QUIT);        
> > > > > > > 
> > > > > > > I think I was expecting a shutdown on the socket here - why doesn't it
> > > > > > > have one?      
> > > > > > 
> > > > > > For nbd, we register two yank functions: This one and we enable
> > > > > > the yank feature on the qio channel (see function
> > > > > > nbd_establish_connection below).      
> > > > > 
> > > > > As mentioned on the earlier patch, I don't want to see any yank
> > > > > code in the QIOChannel object directly. This nbd_yank function
> > > > > can simply call the qio_channel_shutdown() function directly
> > > > > and avoid need for modifying the QIOChannel object with yank
> > > > > support.    
> > > > 
> > > > Hi,
> > > > Looking at it again, the problem is not with registering the yank functions, but with tracking the lifetime of it. Suppose we add qio_channel_shutdown to the yank_nbd function. Then we need to unregister it whenever the QIOChannel object is freed.
> > > > 
> > > > In the code that would lead to the following constructs in a lot of places:
> > > >      if (local_err) {
> > > >          yank_unregister_function(s->yank_name, yank_nbd, bs);
> > > >          object_unref(OBJECT(sioc));
> > > >          error_propagate(errp, local_err);
> > > >          return NULL;
> > > >      }    
> > > 
> > > The nbd patch here already has a yank_unregister_function() so I'm
> > > not seeing anything changes in that respect. The "yank_nbd" function
> > > should check that the I/O channel is non-NULL before calling the
> > > qio_channel_shutdown method.  
> > 
> > Hmm, but if object_unref frees the object, it doesn't set the
> > pointer to NULL does it?  
> 
> So set  "ioc = NULL" after calling object_unref. AFAICT, nbd already
> does exactly this.

I see 3 options to do that in a thread-safe manner:
1. Introduce a mutex here.
2. Use atomics to check for NULL and increase the reference count at the same time in yank_nbd so it isn't freed while calling qio_channel_shutdown on it. (I'm unsure how to do that)
3. Do it like it is currently done (but with the new subclass). We get thread safety for free trough the mutex in yank.c.

I prefer option 3 :)

The subclass can live in yank.c. There'd have to be 2 yank functions again but a comment in yank_nbd should suffice.

Regards,
Lukas Straub

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 3/5] block/nbd.c: Add yank feature
  2020-05-15 13:03                 ` Lukas Straub
@ 2020-05-15 13:45                   ` Daniel P. Berrangé
  0 siblings, 0 replies; 52+ messages in thread
From: Daniel P. Berrangé @ 2020-05-15 13:45 UTC (permalink / raw)
  To: Lukas Straub
  Cc: Kevin Wolf, qemu-block, Juan Quintela, qemu-devel, Max Reitz,
	Paolo Bonzini, Marc-André Lureau, Dr. David Alan Gilbert

On Fri, May 15, 2020 at 03:03:30PM +0200, Lukas Straub wrote:
> On Fri, 15 May 2020 11:26:13 +0100
> Daniel P. Berrangé <berrange@redhat.com> wrote:
> 
> > On Fri, May 15, 2020 at 12:14:47PM +0200, Lukas Straub wrote:
> > > On Fri, 15 May 2020 11:04:13 +0100
> > > Daniel P. Berrangé <berrange@redhat.com> wrote:
> > >   
> > > > On Fri, May 15, 2020 at 11:48:18AM +0200, Lukas Straub wrote:  
> > > > > On Tue, 12 May 2020 09:54:58 +0100
> > > > > Daniel P. Berrangé <berrange@redhat.com> wrote:
> > > > >     
> > > > > > On Mon, May 11, 2020 at 07:05:24PM +0200, Lukas Straub wrote:    
> > > > > > > On Mon, 11 May 2020 17:19:09 +0100
> > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > > >       
> > > > > > > > * Lukas Straub (lukasstraub2@web.de) wrote:      
> > > > > > > > > Add yank option, pass it to the socket-channel and register a yank
> > > > > > > > > function which sets s->state = NBD_CLIENT_QUIT. This is the same
> > > > > > > > > behaviour as if an error occured.
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>        
> > > > > > > >       
> > > > > > > > > +static void nbd_yank(void *opaque)
> > > > > > > > > +{
> > > > > > > > > +    BlockDriverState *bs = opaque;
> > > > > > > > > +    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
> > > > > > > > > +
> > > > > > > > > +    atomic_set(&s->state, NBD_CLIENT_QUIT);        
> > > > > > > > 
> > > > > > > > I think I was expecting a shutdown on the socket here - why doesn't it
> > > > > > > > have one?      
> > > > > > > 
> > > > > > > For nbd, we register two yank functions: This one and we enable
> > > > > > > the yank feature on the qio channel (see function
> > > > > > > nbd_establish_connection below).      
> > > > > > 
> > > > > > As mentioned on the earlier patch, I don't want to see any yank
> > > > > > code in the QIOChannel object directly. This nbd_yank function
> > > > > > can simply call the qio_channel_shutdown() function directly
> > > > > > and avoid need for modifying the QIOChannel object with yank
> > > > > > support.    
> > > > > 
> > > > > Hi,
> > > > > Looking at it again, the problem is not with registering the yank functions, but with tracking the lifetime of it. Suppose we add qio_channel_shutdown to the yank_nbd function. Then we need to unregister it whenever the QIOChannel object is freed.
> > > > > 
> > > > > In the code that would lead to the following constructs in a lot of places:
> > > > >      if (local_err) {
> > > > >          yank_unregister_function(s->yank_name, yank_nbd, bs);
> > > > >          object_unref(OBJECT(sioc));
> > > > >          error_propagate(errp, local_err);
> > > > >          return NULL;
> > > > >      }    
> > > > 
> > > > The nbd patch here already has a yank_unregister_function() so I'm
> > > > not seeing anything changes in that respect. The "yank_nbd" function
> > > > should check that the I/O channel is non-NULL before calling the
> > > > qio_channel_shutdown method.  
> > > 
> > > Hmm, but if object_unref frees the object, it doesn't set the
> > > pointer to NULL does it?  
> > 
> > So set  "ioc = NULL" after calling object_unref. AFAICT, nbd already
> > does exactly this.
> 
> I see 3 options to do that in a thread-safe manner:
> 1. Introduce a mutex here.
> 2. Use atomics to check for NULL and increase the reference count at the same time in yank_nbd so it isn't freed while calling qio_channel_shutdown on it. (I'm unsure how to do that)
> 3. Do it like it is currently done (but with the new subclass). We get thread safety for free trough the mutex in yank.c.

Oh, so the problem is that  the yank function can be invoked concurrently
with the object being unreffed.

In normal object finalizers, we just have to order things such that
yank_unregister_function() is called before object_unref(ioc) is
called.

The NBD code is slightly harder because we can close & re-open the
IO separately from the finalizer. eg in nbd_reconnect_attempt we
have

    if (s->ioc) {
        nbd_client_detach_aio_context(s->bs);
        object_unref(OBJECT(s->sioc));
        s->sioc = NULL;
        object_unref(OBJECT(s->ioc));
        s->ioc = NULL;
    }

    s->connect_status = nbd_client_connect(s->bs, &local_err);

If the io channel is not open, then we don't need a yank function
registered. So this code would changed to


    if (s->ioc) {
        nbd_client_detach_aio_context(s->bs);
	yank_unregister_function(...);
        object_unref(OBJECT(s->sioc));
        s->sioc = NULL;
        object_unref(OBJECT(s->ioc));
        s->ioc = NULL;
    }

    s->connect_status = nbd_client_connect(s->bs, &local_err);

The locking in yank_unregister_function() should ensure that any
currently executing yank callback has completed before the
yank_unregister_function() call returns. Thus it should be safe
to unref the cannel.

nbd_client_connect() will call yank_register_function() once it
has successfully started a new connection, which your patch already
handles IIUC.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu
  2020-05-13 11:13                 ` Kevin Wolf
  2020-05-13 11:26                   ` Daniel P. Berrangé
  2020-05-13 12:56                   ` Dr. David Alan Gilbert
@ 2020-09-01 10:35                   ` Markus Armbruster
  2 siblings, 0 replies; 52+ messages in thread
From: Markus Armbruster @ 2020-09-01 10:35 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: Lukas Straub, Daniel P. Berrangé,
	qemu-block, Juan Quintela, qemu-devel, Dr. David Alan Gilbert,
	Paolo Bonzini, Marc-André Lureau, Max Reitz

Kevin Wolf <kwolf@redhat.com> writes:

> Am 13.05.2020 um 12:53 hat Dr. David Alan Gilbert geschrieben:
>> * Kevin Wolf (kwolf@redhat.com) wrote:
>> > Am 12.05.2020 um 11:43 hat Daniel P. BerrangÃƒÂ© geschrieben:
>> > > On Tue, May 12, 2020 at 11:32:06AM +0200, Lukas Straub wrote:
>> > > > On Mon, 11 May 2020 16:46:45 +0100
>> > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
>> > > > 
>> > > > > * Daniel P. BerrangÃƒÆ’Ã‚Â© (berrange@redhat.com) wrote: 
>> > > > > > ...
>> > > > > > That way if QEMU does get stuck, you can start by tearing down the
>> > > > > > least distruptive channel. eg try tearing down the migration connection
>> > > > > > first (which shouldn't negatively impact the guest), and only if that
>> > > > > > doesn't work then, move on to tear down the NBD connection (which risks
>> > > > > > data loss)  
>> > > > > 
>> > > > > I wonder if a different way would be to make all network connections
>> > > > > register with yank, but then make yank take a list of connections to
>> > > > > shutdown(2).
>> > > > 
>> > > > Good Idea. We could name the connections (/yank callbacks) in the
>> > > > form "nbd:<node-name>", "chardev:<chardev-name>" and "migration"
>> > > > (and add "netdev:...", etc. in the future). Then make yank take a
>> > > > list of connection names as you suggest and silently ignore connections
>> > > > that don't exist. And maybe even add a 'query-yank' oob command returning
>> > > > a list of registered connections so the management application can do
>> > > > pattern matching if it wants.
[...]
>> > > Yes, that would make the yank command much more flexible in how it can
>> > > be used.
>> > > 
>> > > As an alternative to using formatted strings like this, it could be
>> > > modelled more explicitly in QAPI
>> > > 
>> > >   { 'struct':  'YankChannels',
>> > >     'data': { 'chardev': [ 'string' ],
>> > >               'nbd': ['string'],
>> > > 	      'migration': bool } }
>> > > 
>> > > In this example, 'chardev' would accept a list of chardev IDs which
>> > > have it enabled, 'nbd' would accept a list of block node IDs which
>> > > have it enabled, and migration is a singleton on/off.
>> > 
>> > Of course, it also means that the yank code needs to know about every
>> > single object that supports the operation, whereas if you only have
>> > strings, the objects could keep registering their connection with a
>> > generic function like yank_register_function() in this version.
>> > 
>> > I'm not sure if the additional complexity is worth the benefits.
>> 
>> I tend to agree; although we do have to ensure we either use an existing
>> naming scheme (e.g. QOM object names?) or make sure we've got a well
>> defined list of prefixes.
>
> Not everything that has a network connection is a QOM object (in fact,
> neither migration nor chardev nor nbd are QOM objects).
>
> I guess it would be nice to have a single namespace for everything in
> QEMU, but the reality is that we have a few separate ones. As long as we
> consistently add a prefix that identifies the namespace in question, I
> think that would work.
>
> This means that if we're using node-name to identify the NBD connection,
> the namespace should be 'block' rather than 'nbd'.
>
> One more thing to consider is, what if a single object has multiple
> connections? In the case of node-names, we have a limited set of allowed
> characters, so we can use one of the remaining characters as a separator
> and then suffix a counter. In other places, the identifier isn't
> restricted, so suffixing doesn't work. Maybe prefixing does, but it
> would have to be there from the beginning then.

If I understand it correctly, the argument for encoding the structured
"what to yank" information into a string is ease of implementation.  The
yank core sees only strings, and doesn't care about their contents.
Code registering "yankables" can build strings however it likes, as long
as they're all distinct.  QMP clients need to understand how the strings
are built to be able to yank specific "yankables" (as opposed to blindly
yanking stuff until QEMU gets unstuck").

I disagree with this argument for a number of reasons.

1. Use of strings doesn't reduce complexity, it moves it.

   String:

   * QMP clients may need to parse the strings they get back from
     command query-yank intro structured data.

   * QMP clients may need to format structured data into a string for
     command yank.

   * How structured data is be parsed from / formatted to a string is
     part of the external interface, and needs to be documented, in
     addition to the data structure.

   * The strings need to be kept backward compatible.  We could
     inadvertently create problems like the one you described above,
     where a format like UNIQUE-PREFIX:ID with an unrestricted ID is not
     extensible.

   * Code providing "yankables" needs to somehow ensure their strings
     are distinct.

   Complex type:

   * The result of query-yank is already structured data, no need for
     QMP clients to parse.

   * The argument of yank is structured data, no need to for QMP clients
     to format it into a string first.

   * Documenting just the data structure suffices.

   * Backward compatibility of complex QMP types is a well-understood
     problem.

   * Defining the structure in the QAPI schema as union trivially
     ensures the union branches are distinct.

2. Even with structured yank arguments, the yank core doesn't *need* to
   understand the structure.

   The yank core manages a set of instances.  Each instance associates a
   key with a list of YankFuncAndParam.  This is a basically dictionary.
   All the yank core needs to do with the dictionary keys is looking
   them up.

   The proposed implementation uses a list of instances (which is just
   fine).  All the lookup needs from the keys is an "is equal" relation.

   Checking two QAPI objects for structural equality is admittedly not
   as simple as comparing strings.  It does not, however, require
   understanding their structure.  Two stupid solutions come to mind:

   * Hand-written compare that keeps specifics out of the yank core

     Whatever uses a variant of YankInstance gets to provide a
     comparison function of the variant members.

     Compare the common members (initially just @type, which is the
     discriminator).  Return false if any of them differs.

     Else both instances use the same variant.  Return the value of the
     variant comparison function.

   * Visitors exist

     Convert both YankInstances to QObject, compare with
     qobject_is_equal(), done.

3. In QMP, encoding structured data in strings is anathema :)

[...]



^ permalink raw reply	[flat|nested] 52+ messages in thread

end of thread, other threads:[~2020-09-01 10:37 UTC | newest]

Thread overview: 52+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-11 11:14 [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Lukas Straub
2020-05-11 11:14 ` [PATCH 1/5] Introduce yank feature Lukas Straub
2020-05-11 11:14 ` [PATCH 2/5] io/channel.c,io/channel-socket.c: Add " Lukas Straub
2020-05-11 11:51   ` Daniel P. Berrangé
2020-05-11 20:00     ` Lukas Straub
2020-05-12  8:52       ` Daniel P. Berrangé
2020-05-11 11:14 ` [PATCH 3/5] block/nbd.c: " Lukas Straub
2020-05-11 16:19   ` Dr. David Alan Gilbert
2020-05-11 17:05     ` Lukas Straub
2020-05-11 17:24       ` Dr. David Alan Gilbert
2020-05-12  8:54       ` Daniel P. Berrangé
2020-05-15  9:48         ` Lukas Straub
2020-05-15 10:04           ` Daniel P. Berrangé
2020-05-15 10:14             ` Lukas Straub
2020-05-15 10:26               ` Daniel P. Berrangé
2020-05-15 13:03                 ` Lukas Straub
2020-05-15 13:45                   ` Daniel P. Berrangé
2020-05-11 11:14 ` [PATCH 4/5] chardev/char-socket.c: " Lukas Straub
2020-05-12  8:56   ` Daniel P. Berrangé
2020-05-11 11:14 ` [PATCH 5/5] migration: " Lukas Straub
2020-05-11 11:49 ` [PATCH 0/5] Introduce 'yank' oob qmp command to recover from hanging qemu Daniel P. Berrangé
2020-05-11 12:07   ` Dr. David Alan Gilbert
2020-05-11 12:17     ` Daniel P. Berrangé
2020-05-11 15:46       ` Dr. David Alan Gilbert
2020-05-12  9:32         ` Lukas Straub
2020-05-12  9:43           ` Daniel P. Berrangé
2020-05-12 18:58             ` Dr. David Alan Gilbert
2020-05-13  8:41               ` Daniel P. Berrangé
2020-05-12 19:42             ` Lukas Straub
2020-05-13 10:32             ` Kevin Wolf
2020-05-13 10:53               ` Dr. David Alan Gilbert
2020-05-13 11:13                 ` Kevin Wolf
2020-05-13 11:26                   ` Daniel P. Berrangé
2020-05-13 11:58                     ` Paolo Bonzini
2020-05-13 12:25                       ` Dr. David Alan Gilbert
2020-05-13 12:32                       ` Kevin Wolf
2020-05-13 12:18                     ` Kevin Wolf
2020-05-13 12:56                   ` Dr. David Alan Gilbert
2020-05-13 13:08                     ` Daniel P. Berrangé
2020-05-13 13:48                       ` Dr. David Alan Gilbert
2020-05-13 13:57                         ` Eric Blake
2020-05-13 14:06                         ` Kevin Wolf
2020-05-13 14:18                           ` Eric Blake
2020-09-01 10:35                   ` Markus Armbruster
2020-05-11 19:41       ` Lukas Straub
2020-05-11 18:12   ` Lukas Straub
2020-05-12  9:03     ` Daniel P. Berrangé
2020-05-12  9:06       ` Dr. David Alan Gilbert
2020-05-12  9:09     ` Dr. David Alan Gilbert
2020-05-13 19:12 ` Lukas Straub
2020-05-14 10:41   ` Kevin Wolf
2020-05-14 11:01   ` Dr. David Alan Gilbert

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.