All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] io: Add support for MSG_PEEK for socket channel
@ 2022-11-19  9:36 manish.mishra
  2022-11-19  9:36 ` [PATCH 2/2] migration: check magic value for deciding the mapping of channels manish.mishra
                   ` (4 more replies)
  0 siblings, 5 replies; 24+ messages in thread
From: manish.mishra @ 2022-11-19  9:36 UTC (permalink / raw)
  To: qemu-devel
  Cc: berrange, peterx, prerna.saxena, quintela, dgilbert, lsoaresp,
	manish.mishra

MSG_PEEK reads from the peek of channel, The data is treated as
unread and the next read shall still return this data. This
support is currently added only for socket class. Extra parameter
'flags' is added to io_readv calls to pass extra read flags like
MSG_PEEK.
---
 chardev/char-socket.c               |  4 +-
 include/io/channel.h                | 83 +++++++++++++++++++++++++++++
 io/channel-buffer.c                 |  1 +
 io/channel-command.c                |  1 +
 io/channel-file.c                   |  1 +
 io/channel-null.c                   |  1 +
 io/channel-socket.c                 | 16 +++++-
 io/channel-tls.c                    |  1 +
 io/channel-websock.c                |  1 +
 io/channel.c                        | 73 +++++++++++++++++++++++--
 migration/channel-block.c           |  1 +
 scsi/qemu-pr-helper.c               |  2 +-
 tests/qtest/tpm-emu.c               |  2 +-
 tests/unit/test-io-channel-socket.c |  1 +
 util/vhost-user-server.c            |  2 +-
 15 files changed, 179 insertions(+), 11 deletions(-)

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 879564aa8a..5afce9a464 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
     if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
         ret = qio_channel_readv_full(s->ioc, &iov, 1,
                                      &msgfds, &msgfds_num,
-                                     NULL);
+                                     0, NULL);
     } else {
         ret = qio_channel_readv_full(s->ioc, &iov, 1,
                                      NULL, NULL,
-                                     NULL);
+                                     0, NULL);
     }
 
     if (msgfds_num) {
diff --git a/include/io/channel.h b/include/io/channel.h
index c680ee7480..cbcde4b88f 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
 
 #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
 
+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
+
 typedef enum QIOChannelFeature QIOChannelFeature;
 
 enum QIOChannelFeature {
@@ -41,6 +43,7 @@ enum QIOChannelFeature {
     QIO_CHANNEL_FEATURE_SHUTDOWN,
     QIO_CHANNEL_FEATURE_LISTEN,
     QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
+    QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
 };
 
 
@@ -114,6 +117,7 @@ struct QIOChannelClass {
                         size_t niov,
                         int **fds,
                         size_t *nfds,
+                        int flags,
                         Error **errp);
     int (*io_close)(QIOChannel *ioc,
                     Error **errp);
@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc,
  * @niov: the length of the @iov array
  * @fds: pointer to an array that will received file handles
  * @nfds: pointer filled with number of elements in @fds on return
+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
  * @errp: pointer to a NULL-initialized error object
  *
  * Read data from the IO channel, storing it in the
@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
                                size_t niov,
                                int **fds,
                                size_t *nfds,
+                               int flags,
                                Error **errp);
 
 
@@ -300,6 +306,34 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
                               size_t niov,
                               Error **errp);
 
+/**
+ * qio_channel_readv_peek_all_eof:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data into
+ * @niov: the length of the @iov array
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Read data from the peek of IO channel without
+ * actually removing it from channel buffer, storing
+ * it in the memory regions referenced by @iov. Each
+ * element in the @iov will be fully populated with
+ * data before the next one is used. The @niov
+ * parameter specifies the total number of elements
+ * in @iov.
+ *
+ * The function will wait for all requested data
+ * to be read, yielding from the current coroutine
+ * if required.
+ *
+ * Returns: 1 if all bytes were read, 0 if end-of-file
+ *          occurs without data, or -1 on error
+ */
+int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
+                                   const struct iovec *iov,
+                                   size_t niov,
+                                   Error **errp);
+
+
 /**
  * qio_channel_readv_all:
  * @ioc: the channel object
@@ -328,6 +362,34 @@ int qio_channel_readv_all(QIOChannel *ioc,
                           Error **errp);
 
 
+/**
+ * qio_channel_readv_peek_all:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data into
+ * @niov: the length of the @iov array
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Read data from the the peek of IO channel without
+ * removing from channel buffer, storing it in the
+ * memory regions referenced by @iov. Each element
+ * in the @iov will be fully populated with data
+ * before the next one is used. The @niov parameter
+ * specifies the total number of elements in @iov.
+ *
+ * The function will wait for all requested data
+ * to be read, yielding from the current coroutine
+ * if required.
+ *
+ * If end-of-file occurs before all requested data
+ * has been read, an error will be reported.
+ *
+ * Returns: 0 if all bytes were read, or -1 on error
+ */
+int qio_channel_readv_peek_all(QIOChannel *ioc,
+                               const struct iovec *iov,
+                               size_t niov,
+                               Error **errp);
+
 /**
  * qio_channel_writev_all:
  * @ioc: the channel object
@@ -456,6 +518,27 @@ int qio_channel_read_all(QIOChannel *ioc,
                          size_t buflen,
                          Error **errp);
 
+/**
+ * qio_channel_read_peek_all:
+ * @ioc: the channel object
+ * @buf: the memory region to read data into
+ * @buflen: the number of bytes to @buf
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Reads @buflen bytes from the peek of channel into @buf without
+ * removing it from channel buffer, possibly blocking or (if the
+ * channel is non-blocking) yielding from the current coroutine
+ * multiple times until the entire content is read. If end-of-file
+ * occurs it will return an error rather than a short-read. Otherwise
+ * behaves as qio_channel_read().
+ *
+ * Returns: 0 if all bytes were read, or -1 on error
+ */
+int qio_channel_read_peek_all(QIOChannel *ioc,
+                              const char *buf,
+                              size_t buflen,
+                              Error **errp);
+
 /**
  * qio_channel_write_all:
  * @ioc: the channel object
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
index bf52011be2..8096180f85 100644
--- a/io/channel-buffer.c
+++ b/io/channel-buffer.c
@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc,
                                         size_t niov,
                                         int **fds,
                                         size_t *nfds,
+                                        int flags,
                                         Error **errp)
 {
     QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
diff --git a/io/channel-command.c b/io/channel-command.c
index 74516252ba..e7edd091af 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
                                          size_t niov,
                                          int **fds,
                                          size_t *nfds,
+                                         int flags,
                                          Error **errp)
 {
     QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
diff --git a/io/channel-file.c b/io/channel-file.c
index b67687c2aa..d76663e6ae 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
                                       size_t niov,
                                       int **fds,
                                       size_t *nfds,
+                                      int flags,
                                       Error **errp)
 {
     QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
diff --git a/io/channel-null.c b/io/channel-null.c
index 75e3781507..4fafdb770d 100644
--- a/io/channel-null.c
+++ b/io/channel-null.c
@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc,
                        size_t niov,
                        int **fds G_GNUC_UNUSED,
                        size_t *nfds G_GNUC_UNUSED,
+                       int flags,
                        Error **errp)
 {
     QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
diff --git a/io/channel-socket.c b/io/channel-socket.c
index b76dca9cc1..a06b24766d 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
     }
 #endif /* WIN32 */
 
+    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
+
     trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
     return cioc;
 
@@ -496,6 +498,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
                                         size_t niov,
                                         int **fds,
                                         size_t *nfds,
+                                        int flags,
                                         Error **errp)
 {
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@@ -517,6 +520,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
 
     }
 
+    if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
+        sflags |= MSG_PEEK;
+    }
+
  retry:
     ret = recvmsg(sioc->fd, &msg, sflags);
     if (ret < 0) {
@@ -624,11 +631,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
                                         size_t niov,
                                         int **fds,
                                         size_t *nfds,
+                                        int flags,
                                         Error **errp)
 {
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
     ssize_t done = 0;
     ssize_t i;
+    int sflags = 0;
+
+    if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
+        sflags |= MSG_PEEK;
+    }
 
     for (i = 0; i < niov; i++) {
         ssize_t ret;
@@ -636,7 +649,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
         ret = recv(sioc->fd,
                    iov[i].iov_base,
                    iov[i].iov_len,
-                   0);
+                   sflags);
         if (ret < 0) {
             if (errno == EAGAIN) {
                 if (done) {
@@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 }
 #endif /* WIN32 */
 
-
 #ifdef QEMU_MSG_ZEROCOPY
 static int qio_channel_socket_flush(QIOChannel *ioc,
                                     Error **errp)
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 4ce890a538..c730cb8ec5 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
                                      size_t niov,
                                      int **fds,
                                      size_t *nfds,
+                                     int flags,
                                      Error **errp)
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
diff --git a/io/channel-websock.c b/io/channel-websock.c
index fb4932ade7..a12acc27cf 100644
--- a/io/channel-websock.c
+++ b/io/channel-websock.c
@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc,
                                          size_t niov,
                                          int **fds,
                                          size_t *nfds,
+                                         int flags,
                                          Error **errp)
 {
     QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
diff --git a/io/channel.c b/io/channel.c
index 0640941ac5..23c8752918 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
                                size_t niov,
                                int **fds,
                                size_t *nfds,
+                               int flags,
                                Error **errp)
 {
     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
         return -1;
     }
 
-    return klass->io_readv(ioc, iov, niov, fds, nfds, errp);
+    if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) &&
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+        error_setg_errno(errp, EINVAL,
+                         "Channel does not support peek read");
+        return -1;
+    }
+
+    return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp);
 }
 
 
@@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
     return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
 }
 
+int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
+                                   const struct iovec *iov,
+                                   size_t niov,
+                                   Error **errp)
+{
+   ssize_t len = 0;
+   ssize_t total = iov_size(iov, niov);
+
+   while (len < total) {
+       len = qio_channel_readv_full(ioc, iov, niov, NULL,
+                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
+
+       if (len == QIO_CHANNEL_ERR_BLOCK) {
+            if (qemu_in_coroutine()) {
+                qio_channel_yield(ioc, G_IO_IN);
+            } else {
+                qio_channel_wait(ioc, G_IO_IN);
+            }
+            continue;
+       }
+       if (len == 0) {
+           return 0;
+       }
+       if (len < 0) {
+           return -1;
+       }
+   }
+
+   return 1;
+}
+
 int qio_channel_readv_all(QIOChannel *ioc,
                           const struct iovec *iov,
                           size_t niov,
@@ -117,6 +156,24 @@ int qio_channel_readv_all(QIOChannel *ioc,
     return qio_channel_readv_full_all(ioc, iov, niov, NULL, NULL, errp);
 }
 
+int qio_channel_readv_peek_all(QIOChannel *ioc,
+                               const struct iovec *iov,
+                               size_t niov,
+                               Error **errp)
+{
+    int ret = qio_channel_readv_peek_all_eof(ioc, iov, niov, errp);
+
+    if (ret == 0) {
+        error_setg(errp, "Unexpected end-of-file before all data were read");
+        return -1;
+    }
+    if (ret == 1) {
+        return 0;
+    }
+
+    return ret;
+}
+
 int qio_channel_readv_full_all_eof(QIOChannel *ioc,
                                    const struct iovec *iov,
                                    size_t niov,
@@ -146,7 +203,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc,
     while ((nlocal_iov > 0) || local_fds) {
         ssize_t len;
         len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
-                                     local_nfds, errp);
+                                     local_nfds, 0, errp);
         if (len == QIO_CHANNEL_ERR_BLOCK) {
             if (qemu_in_coroutine()) {
                 qio_channel_yield(ioc, G_IO_IN);
@@ -284,7 +341,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc,
                           size_t niov,
                           Error **errp)
 {
-    return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp);
+    return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp);
 }
 
 
@@ -303,7 +360,7 @@ ssize_t qio_channel_read(QIOChannel *ioc,
                          Error **errp)
 {
     struct iovec iov = { .iov_base = buf, .iov_len = buflen };
-    return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp);
+    return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp);
 }
 
 
@@ -336,6 +393,14 @@ int qio_channel_read_all(QIOChannel *ioc,
     return qio_channel_readv_all(ioc, &iov, 1, errp);
 }
 
+int qio_channel_read_peek_all(QIOChannel *ioc,
+                              const char *buf,
+                              size_t buflen,
+                              Error **errp)
+{
+    struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
+    return qio_channel_readv_peek_all(ioc, &iov, 1, errp);
+}
 
 int qio_channel_write_all(QIOChannel *ioc,
                           const char *buf,
diff --git a/migration/channel-block.c b/migration/channel-block.c
index c55c8c93ce..0b0deeb919 100644
--- a/migration/channel-block.c
+++ b/migration/channel-block.c
@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc,
                         size_t niov,
                         int **fds,
                         size_t *nfds,
+                        int flags,
                         Error **errp)
 {
     QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc);
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index 196b78c00d..199227a556 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
         iov.iov_base = buf;
         iov.iov_len = sz;
         n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
-                                        &fds, &nfds, errp);
+                                        &fds, &nfds, 0, errp);
 
         if (n_read == QIO_CHANNEL_ERR_BLOCK) {
             qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c
index 2994d1cf42..3cf1acaf7d 100644
--- a/tests/qtest/tpm-emu.c
+++ b/tests/qtest/tpm-emu.c
@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data)
         int *pfd = NULL;
         size_t nfd = 0;
 
-        qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort);
+        qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort);
         cmd = be32_to_cpu(cmd);
         g_assert_cmpint(cmd, ==, CMD_SET_DATAFD);
         g_assert_cmpint(nfd, ==, 1);
diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
index b36a5d972a..b964bb202d 100644
--- a/tests/unit/test-io-channel-socket.c
+++ b/tests/unit/test-io-channel-socket.c
@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void)
                            G_N_ELEMENTS(iorecv),
                            &fdrecv,
                            &nfdrecv,
+                           0,
                            &error_abort);
 
     g_assert(nfdrecv == G_N_ELEMENTS(fdsend));
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 232984ace6..145eb17c08 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
          * qio_channel_readv_full may have short reads, keeping calling it
          * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
          */
-        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
+        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
         if (rc < 0) {
             if (rc == QIO_CHANNEL_ERR_BLOCK) {
                 assert(local_err == NULL);
-- 
2.22.3



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/2] migration: check magic value for deciding the mapping of channels
  2022-11-19  9:36 [PATCH 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
@ 2022-11-19  9:36 ` manish.mishra
  2022-11-19  9:36 ` manish.mishra
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 24+ messages in thread
From: manish.mishra @ 2022-11-19  9:36 UTC (permalink / raw)
  To: qemu-devel
  Cc: berrange, peterx, prerna.saxena, quintela, dgilbert, lsoaresp,
	manish.mishra

Current logic assumes that channel connections on the destination side are
always established in the same order as the source and the first one will
always be the main channel followed by the multifid or post-copy
preemption channel. This may not be always true, as even if a channel has a
connection established on the source side it can be in the pending state on
the destination side and a newer connection can be established first.
Basically causing out of order mapping of channels on the destination side.
Currently, all channels except post-copy preempt send a magic number, this
patch uses that magic number to decide the type of channel. This logic is
applicable only for precopy(multifd) live migration, as mentioned, the
post-copy preempt channel does not send any magic number. Also, tls live
migrations already does tls handshake before creating other channels, so
this issue is not possible with tls, hence this logic is avoided for tls
live migrations. This patch uses read peek to check the magic number of
channels so that current data/control stream management remains
un-effected.
---
 migration/migration.c    | 44 +++++++++++++++++++++++++++++-----------
 migration/multifd.c      | 12 ++++-------
 migration/multifd.h      |  2 +-
 migration/postcopy-ram.c |  5 +----
 migration/postcopy-ram.h |  2 +-
 5 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 739bb683f3..787e678d48 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -733,31 +733,51 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
 {
     MigrationIncomingState *mis = migration_incoming_get_current();
     Error *local_err = NULL;
-    bool start_migration;
     QEMUFile *f;
+    bool default_channel = true;
+    uint32_t channel_magic = 0;
+    int ret = 0;
 
-    if (!mis->from_src_file) {
-        /* The first connection (multifd may have multiple) */
+    if (migrate_use_multifd() && !migrate_postcopy_ram() &&
+        qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+        /*
+         * With multiple channels, it is possible that we receive channels
+         * out of order on destination side, causing incorrect mapping of
+         * source channels on destination side. Check channel MAGIC to
+         * decide type of channel. Please note this is best effort, postcopy
+         * preempt channel does not send any magic number so avoid it for
+         * postcopy live migration. Also tls live migration already does
+         * tls handshake while initializing main channel so with tls this
+         * issue is not possible.
+         */
+        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
+                                        sizeof(channel_magic), &local_err);
+
+        if (ret != 0) {
+            error_propagate(errp, local_err);
+            return;
+        }
+
+        default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
+    } else {
+        default_channel = !mis->from_src_file;
+    }
+
+    if (default_channel) {
         f = qemu_file_new_input(ioc);
 
         if (!migration_incoming_setup(f, errp)) {
             return;
         }
-
-        /*
-         * Common migration only needs one channel, so we can start
-         * right now.  Some features need more than one channel, we wait.
-         */
-        start_migration = !migration_needs_multiple_sockets();
     } else {
         /* Multiple connections */
         assert(migration_needs_multiple_sockets());
         if (migrate_use_multifd()) {
-            start_migration = multifd_recv_new_channel(ioc, &local_err);
+            multifd_recv_new_channel(ioc, &local_err);
         } else {
             assert(migrate_postcopy_preempt());
             f = qemu_file_new_input(ioc);
-            start_migration = postcopy_preempt_new_channel(mis, f);
+            postcopy_preempt_new_channel(mis, f);
         }
         if (local_err) {
             error_propagate(errp, local_err);
@@ -765,7 +785,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
         }
     }
 
-    if (start_migration) {
+    if (migration_has_all_channels()) {
         /* If it's a recovery, we're done */
         if (postcopy_try_recover()) {
             return;
diff --git a/migration/multifd.c b/migration/multifd.c
index 586ddc9d65..be86a4d07f 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -1220,11 +1220,9 @@ bool multifd_recv_all_channels_created(void)
 
 /*
  * Try to receive all multifd channels to get ready for the migration.
- * - Return true and do not set @errp when correctly receiving all channels;
- * - Return false and do not set @errp when correctly receiving the current one;
- * - Return false and set @errp when failing to receive the current channel.
+ * Sets @errp when failing to receive the current channel.
  */
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
 {
     MultiFDRecvParams *p;
     Error *local_err = NULL;
@@ -1237,7 +1235,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
                                 "failed to receive packet"
                                 " via multifd channel %d: ",
                                 qatomic_read(&multifd_recv_state->count));
-        return false;
+        return;
     }
     trace_multifd_recv_new_channel(id);
 
@@ -1247,7 +1245,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
                    id);
         multifd_recv_terminate_threads(local_err);
         error_propagate(errp, local_err);
-        return false;
+        return;
     }
     p->c = ioc;
     object_ref(OBJECT(ioc));
@@ -1258,6 +1256,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
     qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
                        QEMU_THREAD_JOINABLE);
     qatomic_inc(&multifd_recv_state->count);
-    return qatomic_read(&multifd_recv_state->count) ==
-           migrate_multifd_channels();
 }
diff --git a/migration/multifd.h b/migration/multifd.h
index 519f498643..913e4ba274 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -18,7 +18,7 @@ void multifd_save_cleanup(void);
 int multifd_load_setup(Error **errp);
 int multifd_load_cleanup(Error **errp);
 bool multifd_recv_all_channels_created(void);
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
 void multifd_recv_sync_main(void);
 int multifd_send_sync_main(QEMUFile *f);
 int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index b9a37ef255..f84f783ab4 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1539,7 +1539,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
     }
 }
 
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
 {
     /*
      * The new loading channel has its own threads, so it needs to be
@@ -1548,9 +1548,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
     qemu_file_set_blocking(file, true);
     mis->postcopy_qemufile_dst = file;
     trace_postcopy_preempt_new_channel();
-
-    /* Start the migration immediately */
-    return true;
 }
 
 /*
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 6147bf7d1d..25881c4127 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -190,7 +190,7 @@ enum PostcopyChannels {
     RAM_CHANNEL_MAX,
 };
 
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
 int postcopy_preempt_setup(MigrationState *s, Error **errp);
 int postcopy_preempt_wait_channel(MigrationState *s);
 
-- 
2.22.3



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* check magic value for deciding the mapping of channels
  2022-11-19  9:36 [PATCH 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
  2022-11-19  9:36 ` [PATCH 2/2] migration: check magic value for deciding the mapping of channels manish.mishra
@ 2022-11-19  9:36 ` manish.mishra
  2022-11-19  9:36 ` [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 24+ messages in thread
From: manish.mishra @ 2022-11-19  9:36 UTC (permalink / raw)
  To: qemu-devel
  Cc: berrange, peterx, prerna.saxena, quintela, dgilbert, lsoaresp,
	manish.mishra

Current logic assumes that channel connections on the destination side are
always established in the same order as the source and the first one will
always be the main channel followed by the multifid or post-copy
preemption channel. This may not be always true, as even if a channel has a
connection established on the source side it can be in the pending state on
the destination side and a newer connection can be established first.
Basically causing out of order mapping of channels on the destination side.
Currently, all channels except post-copy preempt send a magic number, this
patch uses that magic number to decide the type of channel. This logic is
applicable only for precopy(multifd) live migration, as mentioned, the
post-copy preempt channel does not send any magic number. Also, tls live
migrations already does tls handshake before creating other channels, so
this issue is not possible with tls, hence this logic is avoided for tls
live migrations. This patch uses MSG_PEEK to check the magic number of
channels so that current data/control stream management remains
un-effected.

Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>

v2:
  TLS does not support MSG_PEEK, so V1 was broken for tls live
  migrations. For tls live migration, while initializing main channel
  tls handshake is done before we can create other channels, so this
  issue is not possible for tls live migrations. In V2 added a check
  to avoid checking magic number for tls live migration and fallback
  to older method to decide mapping of channels on destination side.

v3:
  1. Split change in two patches, io patch for read_peek routines,
     migration patch for migration related changes.
  2. Add flags to io_readv calls to get extra read flags like
     MSG_PEEK.
  3. Some other minor fixes.

manish.mishra (2):
  io: Add support for MSG_PEEK for socket channel
  migration: check magic value for deciding the mapping of channels

 chardev/char-socket.c               |  4 +-
 include/io/channel.h                | 83 +++++++++++++++++++++++++++++
 io/channel-buffer.c                 |  1 +
 io/channel-command.c                |  1 +
 io/channel-file.c                   |  1 +
 io/channel-null.c                   |  1 +
 io/channel-socket.c                 | 16 +++++-
 io/channel-tls.c                    |  1 +
 io/channel-websock.c                |  1 +
 io/channel.c                        | 73 +++++++++++++++++++++++--
 migration/channel-block.c           |  1 +
 migration/migration.c               | 44 ++++++++++-----
 migration/multifd.c                 | 12 ++---
 migration/multifd.h                 |  2 +-
 migration/postcopy-ram.c            |  5 +-
 migration/postcopy-ram.h            |  2 +-
 scsi/qemu-pr-helper.c               |  2 +-
 tests/qtest/tpm-emu.c               |  2 +-
 tests/unit/test-io-channel-socket.c |  1 +
 util/vhost-user-server.c            |  2 +-
 20 files changed, 218 insertions(+), 37 deletions(-)

-- 
2.22.3



^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-19  9:36 [PATCH 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
  2022-11-19  9:36 ` [PATCH 2/2] migration: check magic value for deciding the mapping of channels manish.mishra
  2022-11-19  9:36 ` manish.mishra
@ 2022-11-19  9:36 ` manish.mishra
  2022-11-22  9:00   ` Daniel P. Berrangé
  2022-11-19  9:36 ` [PATCH v3 2/2] migration: check magic value for deciding the mapping of channels manish.mishra
  2022-11-19  9:40 ` [PATCH 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
  4 siblings, 1 reply; 24+ messages in thread
From: manish.mishra @ 2022-11-19  9:36 UTC (permalink / raw)
  To: qemu-devel
  Cc: berrange, peterx, prerna.saxena, quintela, dgilbert, lsoaresp,
	manish.mishra

MSG_PEEK reads from the peek of channel, The data is treated as
unread and the next read shall still return this data. This
support is currently added only for socket class. Extra parameter
'flags' is added to io_readv calls to pass extra read flags like
MSG_PEEK.

Suggested-by: Daniel P. Berrangé <berrange@redhat.com
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
---
 chardev/char-socket.c               |  4 +-
 include/io/channel.h                | 83 +++++++++++++++++++++++++++++
 io/channel-buffer.c                 |  1 +
 io/channel-command.c                |  1 +
 io/channel-file.c                   |  1 +
 io/channel-null.c                   |  1 +
 io/channel-socket.c                 | 16 +++++-
 io/channel-tls.c                    |  1 +
 io/channel-websock.c                |  1 +
 io/channel.c                        | 73 +++++++++++++++++++++++--
 migration/channel-block.c           |  1 +
 scsi/qemu-pr-helper.c               |  2 +-
 tests/qtest/tpm-emu.c               |  2 +-
 tests/unit/test-io-channel-socket.c |  1 +
 util/vhost-user-server.c            |  2 +-
 15 files changed, 179 insertions(+), 11 deletions(-)

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 879564aa8a..5afce9a464 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
     if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
         ret = qio_channel_readv_full(s->ioc, &iov, 1,
                                      &msgfds, &msgfds_num,
-                                     NULL);
+                                     0, NULL);
     } else {
         ret = qio_channel_readv_full(s->ioc, &iov, 1,
                                      NULL, NULL,
-                                     NULL);
+                                     0, NULL);
     }
 
     if (msgfds_num) {
diff --git a/include/io/channel.h b/include/io/channel.h
index c680ee7480..cbcde4b88f 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
 
 #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
 
+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
+
 typedef enum QIOChannelFeature QIOChannelFeature;
 
 enum QIOChannelFeature {
@@ -41,6 +43,7 @@ enum QIOChannelFeature {
     QIO_CHANNEL_FEATURE_SHUTDOWN,
     QIO_CHANNEL_FEATURE_LISTEN,
     QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
+    QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
 };
 
 
@@ -114,6 +117,7 @@ struct QIOChannelClass {
                         size_t niov,
                         int **fds,
                         size_t *nfds,
+                        int flags,
                         Error **errp);
     int (*io_close)(QIOChannel *ioc,
                     Error **errp);
@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc,
  * @niov: the length of the @iov array
  * @fds: pointer to an array that will received file handles
  * @nfds: pointer filled with number of elements in @fds on return
+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
  * @errp: pointer to a NULL-initialized error object
  *
  * Read data from the IO channel, storing it in the
@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
                                size_t niov,
                                int **fds,
                                size_t *nfds,
+                               int flags,
                                Error **errp);
 
 
@@ -300,6 +306,34 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
                               size_t niov,
                               Error **errp);
 
+/**
+ * qio_channel_readv_peek_all_eof:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data into
+ * @niov: the length of the @iov array
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Read data from the peek of IO channel without
+ * actually removing it from channel buffer, storing
+ * it in the memory regions referenced by @iov. Each
+ * element in the @iov will be fully populated with
+ * data before the next one is used. The @niov
+ * parameter specifies the total number of elements
+ * in @iov.
+ *
+ * The function will wait for all requested data
+ * to be read, yielding from the current coroutine
+ * if required.
+ *
+ * Returns: 1 if all bytes were read, 0 if end-of-file
+ *          occurs without data, or -1 on error
+ */
+int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
+                                   const struct iovec *iov,
+                                   size_t niov,
+                                   Error **errp);
+
+
 /**
  * qio_channel_readv_all:
  * @ioc: the channel object
@@ -328,6 +362,34 @@ int qio_channel_readv_all(QIOChannel *ioc,
                           Error **errp);
 
 
+/**
+ * qio_channel_readv_peek_all:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data into
+ * @niov: the length of the @iov array
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Read data from the the peek of IO channel without
+ * removing from channel buffer, storing it in the
+ * memory regions referenced by @iov. Each element
+ * in the @iov will be fully populated with data
+ * before the next one is used. The @niov parameter
+ * specifies the total number of elements in @iov.
+ *
+ * The function will wait for all requested data
+ * to be read, yielding from the current coroutine
+ * if required.
+ *
+ * If end-of-file occurs before all requested data
+ * has been read, an error will be reported.
+ *
+ * Returns: 0 if all bytes were read, or -1 on error
+ */
+int qio_channel_readv_peek_all(QIOChannel *ioc,
+                               const struct iovec *iov,
+                               size_t niov,
+                               Error **errp);
+
 /**
  * qio_channel_writev_all:
  * @ioc: the channel object
@@ -456,6 +518,27 @@ int qio_channel_read_all(QIOChannel *ioc,
                          size_t buflen,
                          Error **errp);
 
+/**
+ * qio_channel_read_peek_all:
+ * @ioc: the channel object
+ * @buf: the memory region to read data into
+ * @buflen: the number of bytes to @buf
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Reads @buflen bytes from the peek of channel into @buf without
+ * removing it from channel buffer, possibly blocking or (if the
+ * channel is non-blocking) yielding from the current coroutine
+ * multiple times until the entire content is read. If end-of-file
+ * occurs it will return an error rather than a short-read. Otherwise
+ * behaves as qio_channel_read().
+ *
+ * Returns: 0 if all bytes were read, or -1 on error
+ */
+int qio_channel_read_peek_all(QIOChannel *ioc,
+                              const char *buf,
+                              size_t buflen,
+                              Error **errp);
+
 /**
  * qio_channel_write_all:
  * @ioc: the channel object
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
index bf52011be2..8096180f85 100644
--- a/io/channel-buffer.c
+++ b/io/channel-buffer.c
@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc,
                                         size_t niov,
                                         int **fds,
                                         size_t *nfds,
+                                        int flags,
                                         Error **errp)
 {
     QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
diff --git a/io/channel-command.c b/io/channel-command.c
index 74516252ba..e7edd091af 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
                                          size_t niov,
                                          int **fds,
                                          size_t *nfds,
+                                         int flags,
                                          Error **errp)
 {
     QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
diff --git a/io/channel-file.c b/io/channel-file.c
index b67687c2aa..d76663e6ae 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
                                       size_t niov,
                                       int **fds,
                                       size_t *nfds,
+                                      int flags,
                                       Error **errp)
 {
     QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
diff --git a/io/channel-null.c b/io/channel-null.c
index 75e3781507..4fafdb770d 100644
--- a/io/channel-null.c
+++ b/io/channel-null.c
@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc,
                        size_t niov,
                        int **fds G_GNUC_UNUSED,
                        size_t *nfds G_GNUC_UNUSED,
+                       int flags,
                        Error **errp)
 {
     QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
diff --git a/io/channel-socket.c b/io/channel-socket.c
index b76dca9cc1..a06b24766d 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
     }
 #endif /* WIN32 */
 
+    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
+
     trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
     return cioc;
 
@@ -496,6 +498,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
                                         size_t niov,
                                         int **fds,
                                         size_t *nfds,
+                                        int flags,
                                         Error **errp)
 {
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@@ -517,6 +520,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
 
     }
 
+    if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
+        sflags |= MSG_PEEK;
+    }
+
  retry:
     ret = recvmsg(sioc->fd, &msg, sflags);
     if (ret < 0) {
@@ -624,11 +631,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
                                         size_t niov,
                                         int **fds,
                                         size_t *nfds,
+                                        int flags,
                                         Error **errp)
 {
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
     ssize_t done = 0;
     ssize_t i;
+    int sflags = 0;
+
+    if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
+        sflags |= MSG_PEEK;
+    }
 
     for (i = 0; i < niov; i++) {
         ssize_t ret;
@@ -636,7 +649,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
         ret = recv(sioc->fd,
                    iov[i].iov_base,
                    iov[i].iov_len,
-                   0);
+                   sflags);
         if (ret < 0) {
             if (errno == EAGAIN) {
                 if (done) {
@@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 }
 #endif /* WIN32 */
 
-
 #ifdef QEMU_MSG_ZEROCOPY
 static int qio_channel_socket_flush(QIOChannel *ioc,
                                     Error **errp)
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 4ce890a538..c730cb8ec5 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
                                      size_t niov,
                                      int **fds,
                                      size_t *nfds,
+                                     int flags,
                                      Error **errp)
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
diff --git a/io/channel-websock.c b/io/channel-websock.c
index fb4932ade7..a12acc27cf 100644
--- a/io/channel-websock.c
+++ b/io/channel-websock.c
@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc,
                                          size_t niov,
                                          int **fds,
                                          size_t *nfds,
+                                         int flags,
                                          Error **errp)
 {
     QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
diff --git a/io/channel.c b/io/channel.c
index 0640941ac5..23c8752918 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
                                size_t niov,
                                int **fds,
                                size_t *nfds,
+                               int flags,
                                Error **errp)
 {
     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
         return -1;
     }
 
-    return klass->io_readv(ioc, iov, niov, fds, nfds, errp);
+    if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) &&
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+        error_setg_errno(errp, EINVAL,
+                         "Channel does not support peek read");
+        return -1;
+    }
+
+    return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp);
 }
 
 
@@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
     return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
 }
 
+int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
+                                   const struct iovec *iov,
+                                   size_t niov,
+                                   Error **errp)
+{
+   ssize_t len = 0;
+   ssize_t total = iov_size(iov, niov);
+
+   while (len < total) {
+       len = qio_channel_readv_full(ioc, iov, niov, NULL,
+                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
+
+       if (len == QIO_CHANNEL_ERR_BLOCK) {
+            if (qemu_in_coroutine()) {
+                qio_channel_yield(ioc, G_IO_IN);
+            } else {
+                qio_channel_wait(ioc, G_IO_IN);
+            }
+            continue;
+       }
+       if (len == 0) {
+           return 0;
+       }
+       if (len < 0) {
+           return -1;
+       }
+   }
+
+   return 1;
+}
+
 int qio_channel_readv_all(QIOChannel *ioc,
                           const struct iovec *iov,
                           size_t niov,
@@ -117,6 +156,24 @@ int qio_channel_readv_all(QIOChannel *ioc,
     return qio_channel_readv_full_all(ioc, iov, niov, NULL, NULL, errp);
 }
 
+int qio_channel_readv_peek_all(QIOChannel *ioc,
+                               const struct iovec *iov,
+                               size_t niov,
+                               Error **errp)
+{
+    int ret = qio_channel_readv_peek_all_eof(ioc, iov, niov, errp);
+
+    if (ret == 0) {
+        error_setg(errp, "Unexpected end-of-file before all data were read");
+        return -1;
+    }
+    if (ret == 1) {
+        return 0;
+    }
+
+    return ret;
+}
+
 int qio_channel_readv_full_all_eof(QIOChannel *ioc,
                                    const struct iovec *iov,
                                    size_t niov,
@@ -146,7 +203,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc,
     while ((nlocal_iov > 0) || local_fds) {
         ssize_t len;
         len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
-                                     local_nfds, errp);
+                                     local_nfds, 0, errp);
         if (len == QIO_CHANNEL_ERR_BLOCK) {
             if (qemu_in_coroutine()) {
                 qio_channel_yield(ioc, G_IO_IN);
@@ -284,7 +341,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc,
                           size_t niov,
                           Error **errp)
 {
-    return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp);
+    return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp);
 }
 
 
@@ -303,7 +360,7 @@ ssize_t qio_channel_read(QIOChannel *ioc,
                          Error **errp)
 {
     struct iovec iov = { .iov_base = buf, .iov_len = buflen };
-    return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp);
+    return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp);
 }
 
 
@@ -336,6 +393,14 @@ int qio_channel_read_all(QIOChannel *ioc,
     return qio_channel_readv_all(ioc, &iov, 1, errp);
 }
 
+int qio_channel_read_peek_all(QIOChannel *ioc,
+                              const char *buf,
+                              size_t buflen,
+                              Error **errp)
+{
+    struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
+    return qio_channel_readv_peek_all(ioc, &iov, 1, errp);
+}
 
 int qio_channel_write_all(QIOChannel *ioc,
                           const char *buf,
diff --git a/migration/channel-block.c b/migration/channel-block.c
index c55c8c93ce..0b0deeb919 100644
--- a/migration/channel-block.c
+++ b/migration/channel-block.c
@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc,
                         size_t niov,
                         int **fds,
                         size_t *nfds,
+                        int flags,
                         Error **errp)
 {
     QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc);
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index 196b78c00d..199227a556 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
         iov.iov_base = buf;
         iov.iov_len = sz;
         n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
-                                        &fds, &nfds, errp);
+                                        &fds, &nfds, 0, errp);
 
         if (n_read == QIO_CHANNEL_ERR_BLOCK) {
             qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c
index 2994d1cf42..3cf1acaf7d 100644
--- a/tests/qtest/tpm-emu.c
+++ b/tests/qtest/tpm-emu.c
@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data)
         int *pfd = NULL;
         size_t nfd = 0;
 
-        qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort);
+        qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort);
         cmd = be32_to_cpu(cmd);
         g_assert_cmpint(cmd, ==, CMD_SET_DATAFD);
         g_assert_cmpint(nfd, ==, 1);
diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
index b36a5d972a..b964bb202d 100644
--- a/tests/unit/test-io-channel-socket.c
+++ b/tests/unit/test-io-channel-socket.c
@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void)
                            G_N_ELEMENTS(iorecv),
                            &fdrecv,
                            &nfdrecv,
+                           0,
                            &error_abort);
 
     g_assert(nfdrecv == G_N_ELEMENTS(fdsend));
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 232984ace6..145eb17c08 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
          * qio_channel_readv_full may have short reads, keeping calling it
          * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
          */
-        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
+        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
         if (rc < 0) {
             if (rc == QIO_CHANNEL_ERR_BLOCK) {
                 assert(local_err == NULL);
-- 
2.22.3



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v3 2/2] migration: check magic value for deciding the mapping of channels
  2022-11-19  9:36 [PATCH 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
                   ` (2 preceding siblings ...)
  2022-11-19  9:36 ` [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
@ 2022-11-19  9:36 ` manish.mishra
  2022-11-21 21:59   ` Peter Xu
  2022-11-22  9:01   ` Daniel P. Berrangé
  2022-11-19  9:40 ` [PATCH 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
  4 siblings, 2 replies; 24+ messages in thread
From: manish.mishra @ 2022-11-19  9:36 UTC (permalink / raw)
  To: qemu-devel
  Cc: berrange, peterx, prerna.saxena, quintela, dgilbert, lsoaresp,
	manish.mishra

Current logic assumes that channel connections on the destination side are
always established in the same order as the source and the first one will
always be the main channel followed by the multifid or post-copy
preemption channel. This may not be always true, as even if a channel has a
connection established on the source side it can be in the pending state on
the destination side and a newer connection can be established first.
Basically causing out of order mapping of channels on the destination side.
Currently, all channels except post-copy preempt send a magic number, this
patch uses that magic number to decide the type of channel. This logic is
applicable only for precopy(multifd) live migration, as mentioned, the
post-copy preempt channel does not send any magic number. Also, tls live
migrations already does tls handshake before creating other channels, so
this issue is not possible with tls, hence this logic is avoided for tls
live migrations. This patch uses read peek to check the magic number of
channels so that current data/control stream management remains
un-effected.

Suggested-by: Daniel P. Berrangé <berrange@redhat.com
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
---
 migration/migration.c    | 44 +++++++++++++++++++++++++++++-----------
 migration/multifd.c      | 12 ++++-------
 migration/multifd.h      |  2 +-
 migration/postcopy-ram.c |  5 +----
 migration/postcopy-ram.h |  2 +-
 5 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 739bb683f3..787e678d48 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -733,31 +733,51 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
 {
     MigrationIncomingState *mis = migration_incoming_get_current();
     Error *local_err = NULL;
-    bool start_migration;
     QEMUFile *f;
+    bool default_channel = true;
+    uint32_t channel_magic = 0;
+    int ret = 0;
 
-    if (!mis->from_src_file) {
-        /* The first connection (multifd may have multiple) */
+    if (migrate_use_multifd() && !migrate_postcopy_ram() &&
+        qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+        /*
+         * With multiple channels, it is possible that we receive channels
+         * out of order on destination side, causing incorrect mapping of
+         * source channels on destination side. Check channel MAGIC to
+         * decide type of channel. Please note this is best effort, postcopy
+         * preempt channel does not send any magic number so avoid it for
+         * postcopy live migration. Also tls live migration already does
+         * tls handshake while initializing main channel so with tls this
+         * issue is not possible.
+         */
+        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
+                                        sizeof(channel_magic), &local_err);
+
+        if (ret != 0) {
+            error_propagate(errp, local_err);
+            return;
+        }
+
+        default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
+    } else {
+        default_channel = !mis->from_src_file;
+    }
+
+    if (default_channel) {
         f = qemu_file_new_input(ioc);
 
         if (!migration_incoming_setup(f, errp)) {
             return;
         }
-
-        /*
-         * Common migration only needs one channel, so we can start
-         * right now.  Some features need more than one channel, we wait.
-         */
-        start_migration = !migration_needs_multiple_sockets();
     } else {
         /* Multiple connections */
         assert(migration_needs_multiple_sockets());
         if (migrate_use_multifd()) {
-            start_migration = multifd_recv_new_channel(ioc, &local_err);
+            multifd_recv_new_channel(ioc, &local_err);
         } else {
             assert(migrate_postcopy_preempt());
             f = qemu_file_new_input(ioc);
-            start_migration = postcopy_preempt_new_channel(mis, f);
+            postcopy_preempt_new_channel(mis, f);
         }
         if (local_err) {
             error_propagate(errp, local_err);
@@ -765,7 +785,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
         }
     }
 
-    if (start_migration) {
+    if (migration_has_all_channels()) {
         /* If it's a recovery, we're done */
         if (postcopy_try_recover()) {
             return;
diff --git a/migration/multifd.c b/migration/multifd.c
index 586ddc9d65..be86a4d07f 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -1220,11 +1220,9 @@ bool multifd_recv_all_channels_created(void)
 
 /*
  * Try to receive all multifd channels to get ready for the migration.
- * - Return true and do not set @errp when correctly receiving all channels;
- * - Return false and do not set @errp when correctly receiving the current one;
- * - Return false and set @errp when failing to receive the current channel.
+ * Sets @errp when failing to receive the current channel.
  */
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
 {
     MultiFDRecvParams *p;
     Error *local_err = NULL;
@@ -1237,7 +1235,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
                                 "failed to receive packet"
                                 " via multifd channel %d: ",
                                 qatomic_read(&multifd_recv_state->count));
-        return false;
+        return;
     }
     trace_multifd_recv_new_channel(id);
 
@@ -1247,7 +1245,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
                    id);
         multifd_recv_terminate_threads(local_err);
         error_propagate(errp, local_err);
-        return false;
+        return;
     }
     p->c = ioc;
     object_ref(OBJECT(ioc));
@@ -1258,6 +1256,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
     qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
                        QEMU_THREAD_JOINABLE);
     qatomic_inc(&multifd_recv_state->count);
-    return qatomic_read(&multifd_recv_state->count) ==
-           migrate_multifd_channels();
 }
diff --git a/migration/multifd.h b/migration/multifd.h
index 519f498643..913e4ba274 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -18,7 +18,7 @@ void multifd_save_cleanup(void);
 int multifd_load_setup(Error **errp);
 int multifd_load_cleanup(Error **errp);
 bool multifd_recv_all_channels_created(void);
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
 void multifd_recv_sync_main(void);
 int multifd_send_sync_main(QEMUFile *f);
 int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index b9a37ef255..f84f783ab4 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1539,7 +1539,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
     }
 }
 
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
 {
     /*
      * The new loading channel has its own threads, so it needs to be
@@ -1548,9 +1548,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
     qemu_file_set_blocking(file, true);
     mis->postcopy_qemufile_dst = file;
     trace_postcopy_preempt_new_channel();
-
-    /* Start the migration immediately */
-    return true;
 }
 
 /*
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 6147bf7d1d..25881c4127 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -190,7 +190,7 @@ enum PostcopyChannels {
     RAM_CHANNEL_MAX,
 };
 
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
 int postcopy_preempt_setup(MigrationState *s, Error **errp);
 int postcopy_preempt_wait_channel(MigrationState *s);
 
-- 
2.22.3


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-19  9:36 [PATCH 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
                   ` (3 preceding siblings ...)
  2022-11-19  9:36 ` [PATCH v3 2/2] migration: check magic value for deciding the mapping of channels manish.mishra
@ 2022-11-19  9:40 ` manish.mishra
  4 siblings, 0 replies; 24+ messages in thread
From: manish.mishra @ 2022-11-19  9:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: berrange, peterx, prerna.saxena, quintela, dgilbert, lsoaresp


On 19/11/22 3:06 pm, manish.mishra wrote:
> MSG_PEEK reads from the peek of channel, The data is treated as
> unread and the next read shall still return this data. This
> support is currently added only for socket class. Extra parameter
> 'flags' is added to io_readv calls to pass extra read flags like
> MSG_PEEK.
> ---
>   chardev/char-socket.c               |  4 +-
>   include/io/channel.h                | 83 +++++++++++++++++++++++++++++
>   io/channel-buffer.c                 |  1 +
>   io/channel-command.c                |  1 +
>   io/channel-file.c                   |  1 +
>   io/channel-null.c                   |  1 +
>   io/channel-socket.c                 | 16 +++++-
>   io/channel-tls.c                    |  1 +
>   io/channel-websock.c                |  1 +
>   io/channel.c                        | 73 +++++++++++++++++++++++--
>   migration/channel-block.c           |  1 +
>   scsi/qemu-pr-helper.c               |  2 +-
>   tests/qtest/tpm-emu.c               |  2 +-
>   tests/unit/test-io-channel-socket.c |  1 +
>   util/vhost-user-server.c            |  2 +-
>   15 files changed, 179 insertions(+), 11 deletions(-)
>
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index 879564aa8a..5afce9a464 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
>       if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
>           ret = qio_channel_readv_full(s->ioc, &iov, 1,
>                                        &msgfds, &msgfds_num,
> -                                     NULL);
> +                                     0, NULL);
>       } else {
>           ret = qio_channel_readv_full(s->ioc, &iov, 1,
>                                        NULL, NULL,
> -                                     NULL);
> +                                     0, NULL);
>       }
>   
>       if (msgfds_num) {
> diff --git a/include/io/channel.h b/include/io/channel.h
> index c680ee7480..cbcde4b88f 100644
> --- a/include/io/channel.h
> +++ b/include/io/channel.h
> @@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
>   
>   #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
>   
> +#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
> +
>   typedef enum QIOChannelFeature QIOChannelFeature;
>   
>   enum QIOChannelFeature {
> @@ -41,6 +43,7 @@ enum QIOChannelFeature {
>       QIO_CHANNEL_FEATURE_SHUTDOWN,
>       QIO_CHANNEL_FEATURE_LISTEN,
>       QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
> +    QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
>   };
>   
>   
> @@ -114,6 +117,7 @@ struct QIOChannelClass {
>                           size_t niov,
>                           int **fds,
>                           size_t *nfds,
> +                        int flags,
>                           Error **errp);
>       int (*io_close)(QIOChannel *ioc,
>                       Error **errp);
> @@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc,
>    * @niov: the length of the @iov array
>    * @fds: pointer to an array that will received file handles
>    * @nfds: pointer filled with number of elements in @fds on return
> + * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
>    * @errp: pointer to a NULL-initialized error object
>    *
>    * Read data from the IO channel, storing it in the
> @@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
>                                  size_t niov,
>                                  int **fds,
>                                  size_t *nfds,
> +                               int flags,
>                                  Error **errp);
>   
>   
> @@ -300,6 +306,34 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
>                                 size_t niov,
>                                 Error **errp);
>   
> +/**
> + * qio_channel_readv_peek_all_eof:
> + * @ioc: the channel object
> + * @iov: the array of memory regions to read data into
> + * @niov: the length of the @iov array
> + * @errp: pointer to a NULL-initialized error object
> + *
> + * Read data from the peek of IO channel without
> + * actually removing it from channel buffer, storing
> + * it in the memory regions referenced by @iov. Each
> + * element in the @iov will be fully populated with
> + * data before the next one is used. The @niov
> + * parameter specifies the total number of elements
> + * in @iov.
> + *
> + * The function will wait for all requested data
> + * to be read, yielding from the current coroutine
> + * if required.
> + *
> + * Returns: 1 if all bytes were read, 0 if end-of-file
> + *          occurs without data, or -1 on error
> + */
> +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> +                                   const struct iovec *iov,
> +                                   size_t niov,
> +                                   Error **errp);
> +
> +
>   /**
>    * qio_channel_readv_all:
>    * @ioc: the channel object
> @@ -328,6 +362,34 @@ int qio_channel_readv_all(QIOChannel *ioc,
>                             Error **errp);
>   
>   
> +/**
> + * qio_channel_readv_peek_all:
> + * @ioc: the channel object
> + * @iov: the array of memory regions to read data into
> + * @niov: the length of the @iov array
> + * @errp: pointer to a NULL-initialized error object
> + *
> + * Read data from the the peek of IO channel without
> + * removing from channel buffer, storing it in the
> + * memory regions referenced by @iov. Each element
> + * in the @iov will be fully populated with data
> + * before the next one is used. The @niov parameter
> + * specifies the total number of elements in @iov.
> + *
> + * The function will wait for all requested data
> + * to be read, yielding from the current coroutine
> + * if required.
> + *
> + * If end-of-file occurs before all requested data
> + * has been read, an error will be reported.
> + *
> + * Returns: 0 if all bytes were read, or -1 on error
> + */
> +int qio_channel_readv_peek_all(QIOChannel *ioc,
> +                               const struct iovec *iov,
> +                               size_t niov,
> +                               Error **errp);
> +
>   /**
>    * qio_channel_writev_all:
>    * @ioc: the channel object
> @@ -456,6 +518,27 @@ int qio_channel_read_all(QIOChannel *ioc,
>                            size_t buflen,
>                            Error **errp);
>   
> +/**
> + * qio_channel_read_peek_all:
> + * @ioc: the channel object
> + * @buf: the memory region to read data into
> + * @buflen: the number of bytes to @buf
> + * @errp: pointer to a NULL-initialized error object
> + *
> + * Reads @buflen bytes from the peek of channel into @buf without
> + * removing it from channel buffer, possibly blocking or (if the
> + * channel is non-blocking) yielding from the current coroutine
> + * multiple times until the entire content is read. If end-of-file
> + * occurs it will return an error rather than a short-read. Otherwise
> + * behaves as qio_channel_read().
> + *
> + * Returns: 0 if all bytes were read, or -1 on error
> + */
> +int qio_channel_read_peek_all(QIOChannel *ioc,
> +                              const char *buf,
> +                              size_t buflen,
> +                              Error **errp);
> +
>   /**
>    * qio_channel_write_all:
>    * @ioc: the channel object
> diff --git a/io/channel-buffer.c b/io/channel-buffer.c
> index bf52011be2..8096180f85 100644
> --- a/io/channel-buffer.c
> +++ b/io/channel-buffer.c
> @@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc,
>                                           size_t niov,
>                                           int **fds,
>                                           size_t *nfds,
> +                                        int flags,
>                                           Error **errp)
>   {
>       QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
> diff --git a/io/channel-command.c b/io/channel-command.c
> index 74516252ba..e7edd091af 100644
> --- a/io/channel-command.c
> +++ b/io/channel-command.c
> @@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
>                                            size_t niov,
>                                            int **fds,
>                                            size_t *nfds,
> +                                         int flags,
>                                            Error **errp)
>   {
>       QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
> diff --git a/io/channel-file.c b/io/channel-file.c
> index b67687c2aa..d76663e6ae 100644
> --- a/io/channel-file.c
> +++ b/io/channel-file.c
> @@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
>                                         size_t niov,
>                                         int **fds,
>                                         size_t *nfds,
> +                                      int flags,
>                                         Error **errp)
>   {
>       QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
> diff --git a/io/channel-null.c b/io/channel-null.c
> index 75e3781507..4fafdb770d 100644
> --- a/io/channel-null.c
> +++ b/io/channel-null.c
> @@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc,
>                          size_t niov,
>                          int **fds G_GNUC_UNUSED,
>                          size_t *nfds G_GNUC_UNUSED,
> +                       int flags,
>                          Error **errp)
>   {
>       QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
> diff --git a/io/channel-socket.c b/io/channel-socket.c
> index b76dca9cc1..a06b24766d 100644
> --- a/io/channel-socket.c
> +++ b/io/channel-socket.c
> @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
>       }
>   #endif /* WIN32 */
>   
> +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> +
>       trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
>       return cioc;
>   
> @@ -496,6 +498,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
>                                           size_t niov,
>                                           int **fds,
>                                           size_t *nfds,
> +                                        int flags,
>                                           Error **errp)
>   {
>       QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> @@ -517,6 +520,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
>   
>       }
>   
> +    if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
> +        sflags |= MSG_PEEK;
> +    }
> +
>    retry:
>       ret = recvmsg(sioc->fd, &msg, sflags);
>       if (ret < 0) {
> @@ -624,11 +631,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
>                                           size_t niov,
>                                           int **fds,
>                                           size_t *nfds,
> +                                        int flags,
>                                           Error **errp)
>   {
>       QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
>       ssize_t done = 0;
>       ssize_t i;
> +    int sflags = 0;
> +
> +    if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
> +        sflags |= MSG_PEEK;
> +    }
>   
>       for (i = 0; i < niov; i++) {
>           ssize_t ret;
> @@ -636,7 +649,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
>           ret = recv(sioc->fd,
>                      iov[i].iov_base,
>                      iov[i].iov_len,
> -                   0);
> +                   sflags);
>           if (ret < 0) {
>               if (errno == EAGAIN) {
>                   if (done) {
> @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>   }
>   #endif /* WIN32 */
>   
> -
>   #ifdef QEMU_MSG_ZEROCOPY
>   static int qio_channel_socket_flush(QIOChannel *ioc,
>                                       Error **errp)
> diff --git a/io/channel-tls.c b/io/channel-tls.c
> index 4ce890a538..c730cb8ec5 100644
> --- a/io/channel-tls.c
> +++ b/io/channel-tls.c
> @@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
>                                        size_t niov,
>                                        int **fds,
>                                        size_t *nfds,
> +                                     int flags,
>                                        Error **errp)
>   {
>       QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
> diff --git a/io/channel-websock.c b/io/channel-websock.c
> index fb4932ade7..a12acc27cf 100644
> --- a/io/channel-websock.c
> +++ b/io/channel-websock.c
> @@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc,
>                                            size_t niov,
>                                            int **fds,
>                                            size_t *nfds,
> +                                         int flags,
>                                            Error **errp)
>   {
>       QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
> diff --git a/io/channel.c b/io/channel.c
> index 0640941ac5..23c8752918 100644
> --- a/io/channel.c
> +++ b/io/channel.c
> @@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
>                                  size_t niov,
>                                  int **fds,
>                                  size_t *nfds,
> +                               int flags,
>                                  Error **errp)
>   {
>       QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
> @@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
>           return -1;
>       }
>   
> -    return klass->io_readv(ioc, iov, niov, fds, nfds, errp);
> +    if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) &&
> +        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
> +        error_setg_errno(errp, EINVAL,
> +                         "Channel does not support peek read");
> +        return -1;
> +    }
> +
> +    return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp);
>   }
>   
>   
> @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
>       return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
>   }
>   
> +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> +                                   const struct iovec *iov,
> +                                   size_t niov,
> +                                   Error **errp)
> +{
> +   ssize_t len = 0;
> +   ssize_t total = iov_size(iov, niov);
> +
> +   while (len < total) {
> +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> +
> +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> +            if (qemu_in_coroutine()) {
> +                qio_channel_yield(ioc, G_IO_IN);
> +            } else {
> +                qio_channel_wait(ioc, G_IO_IN);
> +            }
> +            continue;
> +       }
> +       if (len == 0) {
> +           return 0;
> +       }
> +       if (len < 0) {
> +           return -1;
> +       }
> +   }
> +
> +   return 1;
> +}
> +
>   int qio_channel_readv_all(QIOChannel *ioc,
>                             const struct iovec *iov,
>                             size_t niov,
> @@ -117,6 +156,24 @@ int qio_channel_readv_all(QIOChannel *ioc,
>       return qio_channel_readv_full_all(ioc, iov, niov, NULL, NULL, errp);
>   }
>   
> +int qio_channel_readv_peek_all(QIOChannel *ioc,
> +                               const struct iovec *iov,
> +                               size_t niov,
> +                               Error **errp)
> +{
> +    int ret = qio_channel_readv_peek_all_eof(ioc, iov, niov, errp);
> +
> +    if (ret == 0) {
> +        error_setg(errp, "Unexpected end-of-file before all data were read");
> +        return -1;
> +    }
> +    if (ret == 1) {
> +        return 0;
> +    }
> +
> +    return ret;
> +}
> +
>   int qio_channel_readv_full_all_eof(QIOChannel *ioc,
>                                      const struct iovec *iov,
>                                      size_t niov,
> @@ -146,7 +203,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc,
>       while ((nlocal_iov > 0) || local_fds) {
>           ssize_t len;
>           len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
> -                                     local_nfds, errp);
> +                                     local_nfds, 0, errp);
>           if (len == QIO_CHANNEL_ERR_BLOCK) {
>               if (qemu_in_coroutine()) {
>                   qio_channel_yield(ioc, G_IO_IN);
> @@ -284,7 +341,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc,
>                             size_t niov,
>                             Error **errp)
>   {
> -    return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp);
> +    return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp);
>   }
>   
>   
> @@ -303,7 +360,7 @@ ssize_t qio_channel_read(QIOChannel *ioc,
>                            Error **errp)
>   {
>       struct iovec iov = { .iov_base = buf, .iov_len = buflen };
> -    return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp);
> +    return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp);
>   }
>   
>   
> @@ -336,6 +393,14 @@ int qio_channel_read_all(QIOChannel *ioc,
>       return qio_channel_readv_all(ioc, &iov, 1, errp);
>   }
>   
> +int qio_channel_read_peek_all(QIOChannel *ioc,
> +                              const char *buf,
> +                              size_t buflen,
> +                              Error **errp)
> +{
> +    struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
> +    return qio_channel_readv_peek_all(ioc, &iov, 1, errp);
> +}
>   
>   int qio_channel_write_all(QIOChannel *ioc,
>                             const char *buf,
> diff --git a/migration/channel-block.c b/migration/channel-block.c
> index c55c8c93ce..0b0deeb919 100644
> --- a/migration/channel-block.c
> +++ b/migration/channel-block.c
> @@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc,
>                           size_t niov,
>                           int **fds,
>                           size_t *nfds,
> +                        int flags,
>                           Error **errp)
>   {
>       QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc);
> diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
> index 196b78c00d..199227a556 100644
> --- a/scsi/qemu-pr-helper.c
> +++ b/scsi/qemu-pr-helper.c
> @@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
>           iov.iov_base = buf;
>           iov.iov_len = sz;
>           n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
> -                                        &fds, &nfds, errp);
> +                                        &fds, &nfds, 0, errp);
>   
>           if (n_read == QIO_CHANNEL_ERR_BLOCK) {
>               qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
> diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c
> index 2994d1cf42..3cf1acaf7d 100644
> --- a/tests/qtest/tpm-emu.c
> +++ b/tests/qtest/tpm-emu.c
> @@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data)
>           int *pfd = NULL;
>           size_t nfd = 0;
>   
> -        qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort);
> +        qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort);
>           cmd = be32_to_cpu(cmd);
>           g_assert_cmpint(cmd, ==, CMD_SET_DATAFD);
>           g_assert_cmpint(nfd, ==, 1);
> diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
> index b36a5d972a..b964bb202d 100644
> --- a/tests/unit/test-io-channel-socket.c
> +++ b/tests/unit/test-io-channel-socket.c
> @@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void)
>                              G_N_ELEMENTS(iorecv),
>                              &fdrecv,
>                              &nfdrecv,
> +                           0,
>                              &error_abort);
>   
>       g_assert(nfdrecv == G_N_ELEMENTS(fdsend));
> diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
> index 232984ace6..145eb17c08 100644
> --- a/util/vhost-user-server.c
> +++ b/util/vhost-user-server.c
> @@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
>            * qio_channel_readv_full may have short reads, keeping calling it
>            * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
>            */
> -        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
> +        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
>           if (rc < 0) {
>               if (rc == QIO_CHANNEL_ERR_BLOCK) {
>                   assert(local_err == NULL);


Sorry ignore this series, sent my mistake, there is another series tagged with V3.



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 2/2] migration: check magic value for deciding the mapping of channels
  2022-11-19  9:36 ` [PATCH v3 2/2] migration: check magic value for deciding the mapping of channels manish.mishra
@ 2022-11-21 21:59   ` Peter Xu
  2022-11-22  9:01   ` Daniel P. Berrangé
  1 sibling, 0 replies; 24+ messages in thread
From: Peter Xu @ 2022-11-21 21:59 UTC (permalink / raw)
  To: manish.mishra
  Cc: qemu-devel, berrange, prerna.saxena, quintela, dgilbert, lsoaresp

On Sat, Nov 19, 2022 at 09:36:15AM +0000, manish.mishra wrote:
> Current logic assumes that channel connections on the destination side are
> always established in the same order as the source and the first one will
> always be the main channel followed by the multifid or post-copy
> preemption channel. This may not be always true, as even if a channel has a
> connection established on the source side it can be in the pending state on
> the destination side and a newer connection can be established first.
> Basically causing out of order mapping of channels on the destination side.
> Currently, all channels except post-copy preempt send a magic number, this
> patch uses that magic number to decide the type of channel. This logic is
> applicable only for precopy(multifd) live migration, as mentioned, the
> post-copy preempt channel does not send any magic number. Also, tls live
> migrations already does tls handshake before creating other channels, so
> this issue is not possible with tls, hence this logic is avoided for tls
> live migrations. This patch uses read peek to check the magic number of
> channels so that current data/control stream management remains
> un-effected.
> 
> Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> Signed-off-by: manish.mishra <manish.mishra@nutanix.com>

Acked-by: Peter Xu <peterx@redhat.com>

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-19  9:36 ` [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
@ 2022-11-22  9:00   ` Daniel P. Berrangé
  2022-11-22  9:08     ` manish.mishra
  0 siblings, 1 reply; 24+ messages in thread
From: Daniel P. Berrangé @ 2022-11-22  9:00 UTC (permalink / raw)
  To: manish.mishra
  Cc: qemu-devel, peterx, prerna.saxena, quintela, dgilbert, lsoaresp

On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> MSG_PEEK reads from the peek of channel, The data is treated as
> unread and the next read shall still return this data. This
> support is currently added only for socket class. Extra parameter
> 'flags' is added to io_readv calls to pass extra read flags like
> MSG_PEEK.
> 
> Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
> ---
>  chardev/char-socket.c               |  4 +-
>  include/io/channel.h                | 83 +++++++++++++++++++++++++++++
>  io/channel-buffer.c                 |  1 +
>  io/channel-command.c                |  1 +
>  io/channel-file.c                   |  1 +
>  io/channel-null.c                   |  1 +
>  io/channel-socket.c                 | 16 +++++-
>  io/channel-tls.c                    |  1 +
>  io/channel-websock.c                |  1 +
>  io/channel.c                        | 73 +++++++++++++++++++++++--
>  migration/channel-block.c           |  1 +
>  scsi/qemu-pr-helper.c               |  2 +-
>  tests/qtest/tpm-emu.c               |  2 +-
>  tests/unit/test-io-channel-socket.c |  1 +
>  util/vhost-user-server.c            |  2 +-
>  15 files changed, 179 insertions(+), 11 deletions(-)



> diff --git a/io/channel-socket.c b/io/channel-socket.c
> index b76dca9cc1..a06b24766d 100644
> --- a/io/channel-socket.c
> +++ b/io/channel-socket.c
> @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
>      }
>  #endif /* WIN32 */
>  
> +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> +

This covers the incoming server side socket.

This also needs to be set in outgoing client side socket in
qio_channel_socket_connect_async


> @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>  }
>  #endif /* WIN32 */
>  
> -
>  #ifdef QEMU_MSG_ZEROCOPY
>  static int qio_channel_socket_flush(QIOChannel *ioc,
>                                      Error **errp)

Please remove this unrelated whitespace change.


> @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
>      return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
>  }
>  
> +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> +                                   const struct iovec *iov,
> +                                   size_t niov,
> +                                   Error **errp)
> +{
> +   ssize_t len = 0;
> +   ssize_t total = iov_size(iov, niov);
> +
> +   while (len < total) {
> +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> +
> +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> +            if (qemu_in_coroutine()) {
> +                qio_channel_yield(ioc, G_IO_IN);
> +            } else {
> +                qio_channel_wait(ioc, G_IO_IN);
> +            }
> +            continue;
> +       }
> +       if (len == 0) {
> +           return 0;
> +       }
> +       if (len < 0) {
> +           return -1;
> +       }
> +   }

This will busy wait burning CPU where there is a read > 0 and < total.


With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 2/2] migration: check magic value for deciding the mapping of channels
  2022-11-19  9:36 ` [PATCH v3 2/2] migration: check magic value for deciding the mapping of channels manish.mishra
  2022-11-21 21:59   ` Peter Xu
@ 2022-11-22  9:01   ` Daniel P. Berrangé
  1 sibling, 0 replies; 24+ messages in thread
From: Daniel P. Berrangé @ 2022-11-22  9:01 UTC (permalink / raw)
  To: manish.mishra
  Cc: qemu-devel, peterx, prerna.saxena, quintela, dgilbert, lsoaresp

On Sat, Nov 19, 2022 at 09:36:15AM +0000, manish.mishra wrote:
> Current logic assumes that channel connections on the destination side are
> always established in the same order as the source and the first one will
> always be the main channel followed by the multifid or post-copy
> preemption channel. This may not be always true, as even if a channel has a
> connection established on the source side it can be in the pending state on
> the destination side and a newer connection can be established first.
> Basically causing out of order mapping of channels on the destination side.
> Currently, all channels except post-copy preempt send a magic number, this
> patch uses that magic number to decide the type of channel. This logic is
> applicable only for precopy(multifd) live migration, as mentioned, the
> post-copy preempt channel does not send any magic number. Also, tls live
> migrations already does tls handshake before creating other channels, so
> this issue is not possible with tls, hence this logic is avoided for tls
> live migrations. This patch uses read peek to check the magic number of
> channels so that current data/control stream management remains
> un-effected.
> 
> Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
> ---
>  migration/migration.c    | 44 +++++++++++++++++++++++++++++-----------
>  migration/multifd.c      | 12 ++++-------
>  migration/multifd.h      |  2 +-
>  migration/postcopy-ram.c |  5 +----
>  migration/postcopy-ram.h |  2 +-
>  5 files changed, 39 insertions(+), 26 deletions(-)

Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22  9:00   ` Daniel P. Berrangé
@ 2022-11-22  9:08     ` manish.mishra
  2022-11-22  9:29       ` Daniel P. Berrangé
  2022-11-22 14:41       ` Peter Xu
  0 siblings, 2 replies; 24+ messages in thread
From: manish.mishra @ 2022-11-22  9:08 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: qemu-devel, peterx, prerna.saxena, quintela, dgilbert, lsoaresp


On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
>> MSG_PEEK reads from the peek of channel, The data is treated as
>> unread and the next read shall still return this data. This
>> support is currently added only for socket class. Extra parameter
>> 'flags' is added to io_readv calls to pass extra read flags like
>> MSG_PEEK.
>>
>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com
>> Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
>> ---
>>   chardev/char-socket.c               |  4 +-
>>   include/io/channel.h                | 83 +++++++++++++++++++++++++++++
>>   io/channel-buffer.c                 |  1 +
>>   io/channel-command.c                |  1 +
>>   io/channel-file.c                   |  1 +
>>   io/channel-null.c                   |  1 +
>>   io/channel-socket.c                 | 16 +++++-
>>   io/channel-tls.c                    |  1 +
>>   io/channel-websock.c                |  1 +
>>   io/channel.c                        | 73 +++++++++++++++++++++++--
>>   migration/channel-block.c           |  1 +
>>   scsi/qemu-pr-helper.c               |  2 +-
>>   tests/qtest/tpm-emu.c               |  2 +-
>>   tests/unit/test-io-channel-socket.c |  1 +
>>   util/vhost-user-server.c            |  2 +-
>>   15 files changed, 179 insertions(+), 11 deletions(-)
>
>
>> diff --git a/io/channel-socket.c b/io/channel-socket.c
>> index b76dca9cc1..a06b24766d 100644
>> --- a/io/channel-socket.c
>> +++ b/io/channel-socket.c
>> @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
>>       }
>>   #endif /* WIN32 */
>>   
>> +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
>> +
> This covers the incoming server side socket.
>
> This also needs to be set in outgoing client side socket in
> qio_channel_socket_connect_async


Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.

>
>> @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>>   }
>>   #endif /* WIN32 */
>>   
>> -
>>   #ifdef QEMU_MSG_ZEROCOPY
>>   static int qio_channel_socket_flush(QIOChannel *ioc,
>>                                       Error **errp)
> Please remove this unrelated whitespace change.
>
>
>> @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
>>       return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
>>   }
>>   
>> +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
>> +                                   const struct iovec *iov,
>> +                                   size_t niov,
>> +                                   Error **errp)
>> +{
>> +   ssize_t len = 0;
>> +   ssize_t total = iov_size(iov, niov);
>> +
>> +   while (len < total) {
>> +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
>> +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
>> +
>> +       if (len == QIO_CHANNEL_ERR_BLOCK) {
>> +            if (qemu_in_coroutine()) {
>> +                qio_channel_yield(ioc, G_IO_IN);
>> +            } else {
>> +                qio_channel_wait(ioc, G_IO_IN);
>> +            }
>> +            continue;
>> +       }
>> +       if (len == 0) {
>> +           return 0;
>> +       }
>> +       if (len < 0) {
>> +           return -1;
>> +       }
>> +   }
> This will busy wait burning CPU where there is a read > 0 and < total.
>

Daniel, i could use MSG_WAITALL too if that works but then we will lose opportunity to yield. Or if you have some other idea.


> With regards,
> Daniel


Thanks

Manish Mishra



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22  9:08     ` manish.mishra
@ 2022-11-22  9:29       ` Daniel P. Berrangé
  2022-11-22  9:40         ` manish.mishra
  2022-11-22 14:41       ` Peter Xu
  1 sibling, 1 reply; 24+ messages in thread
From: Daniel P. Berrangé @ 2022-11-22  9:29 UTC (permalink / raw)
  To: manish.mishra
  Cc: qemu-devel, peterx, prerna.saxena, quintela, dgilbert, lsoaresp

On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
> 
> On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> > On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> > > MSG_PEEK reads from the peek of channel, The data is treated as
> > > unread and the next read shall still return this data. This
> > > support is currently added only for socket class. Extra parameter
> > > 'flags' is added to io_readv calls to pass extra read flags like
> > > MSG_PEEK.
> > > 
> > > Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> > > Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
> > > ---
> > >   chardev/char-socket.c               |  4 +-
> > >   include/io/channel.h                | 83 +++++++++++++++++++++++++++++
> > >   io/channel-buffer.c                 |  1 +
> > >   io/channel-command.c                |  1 +
> > >   io/channel-file.c                   |  1 +
> > >   io/channel-null.c                   |  1 +
> > >   io/channel-socket.c                 | 16 +++++-
> > >   io/channel-tls.c                    |  1 +
> > >   io/channel-websock.c                |  1 +
> > >   io/channel.c                        | 73 +++++++++++++++++++++++--
> > >   migration/channel-block.c           |  1 +
> > >   scsi/qemu-pr-helper.c               |  2 +-
> > >   tests/qtest/tpm-emu.c               |  2 +-
> > >   tests/unit/test-io-channel-socket.c |  1 +
> > >   util/vhost-user-server.c            |  2 +-
> > >   15 files changed, 179 insertions(+), 11 deletions(-)
> > 
> > 
> > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > index b76dca9cc1..a06b24766d 100644
> > > --- a/io/channel-socket.c
> > > +++ b/io/channel-socket.c
> > > @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> > >       }
> > >   #endif /* WIN32 */
> > > +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> > > +
> > This covers the incoming server side socket.
> > 
> > This also needs to be set in outgoing client side socket in
> > qio_channel_socket_connect_async
> 
> 
> Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
> 
> > 
> > > @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > >   }
> > >   #endif /* WIN32 */
> > > -
> > >   #ifdef QEMU_MSG_ZEROCOPY
> > >   static int qio_channel_socket_flush(QIOChannel *ioc,
> > >                                       Error **errp)
> > Please remove this unrelated whitespace change.
> > 
> > 
> > > @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
> > >       return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
> > >   }
> > > +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> > > +                                   const struct iovec *iov,
> > > +                                   size_t niov,
> > > +                                   Error **errp)
> > > +{
> > > +   ssize_t len = 0;
> > > +   ssize_t total = iov_size(iov, niov);
> > > +
> > > +   while (len < total) {
> > > +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> > > +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> > > +
> > > +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > +            if (qemu_in_coroutine()) {
> > > +                qio_channel_yield(ioc, G_IO_IN);
> > > +            } else {
> > > +                qio_channel_wait(ioc, G_IO_IN);
> > > +            }
> > > +            continue;
> > > +       }
> > > +       if (len == 0) {
> > > +           return 0;
> > > +       }
> > > +       if (len < 0) {
> > > +           return -1;
> > > +       }
> > > +   }
> > This will busy wait burning CPU where there is a read > 0 and < total.
> > 
> 
> Daniel, i could use MSG_WAITALL too if that works but then we will
> lose opportunity to yield. Or if you have some other idea.

I fear this is an inherant problem with the idea of using PEEK to
look at the magic data.

If we actually read the magic bytes off the wire, then we could have
the same code path for TLS and non-TLS. We would have to modify the
existing later code paths though to take account of fact that the
magic was already read by an earlier codepath.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22  9:29       ` Daniel P. Berrangé
@ 2022-11-22  9:40         ` manish.mishra
  2022-11-22  9:53           ` Daniel P. Berrangé
  0 siblings, 1 reply; 24+ messages in thread
From: manish.mishra @ 2022-11-22  9:40 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: qemu-devel, peterx, prerna.saxena, quintela, dgilbert, lsoaresp


On 22/11/22 2:59 pm, Daniel P. Berrangé wrote:
> On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
>> On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
>>> On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
>>>> MSG_PEEK reads from the peek of channel, The data is treated as
>>>> unread and the next read shall still return this data. This
>>>> support is currently added only for socket class. Extra parameter
>>>> 'flags' is added to io_readv calls to pass extra read flags like
>>>> MSG_PEEK.
>>>>
>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com
>>>> Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
>>>> ---
>>>>    chardev/char-socket.c               |  4 +-
>>>>    include/io/channel.h                | 83 +++++++++++++++++++++++++++++
>>>>    io/channel-buffer.c                 |  1 +
>>>>    io/channel-command.c                |  1 +
>>>>    io/channel-file.c                   |  1 +
>>>>    io/channel-null.c                   |  1 +
>>>>    io/channel-socket.c                 | 16 +++++-
>>>>    io/channel-tls.c                    |  1 +
>>>>    io/channel-websock.c                |  1 +
>>>>    io/channel.c                        | 73 +++++++++++++++++++++++--
>>>>    migration/channel-block.c           |  1 +
>>>>    scsi/qemu-pr-helper.c               |  2 +-
>>>>    tests/qtest/tpm-emu.c               |  2 +-
>>>>    tests/unit/test-io-channel-socket.c |  1 +
>>>>    util/vhost-user-server.c            |  2 +-
>>>>    15 files changed, 179 insertions(+), 11 deletions(-)
>>>
>>>> diff --git a/io/channel-socket.c b/io/channel-socket.c
>>>> index b76dca9cc1..a06b24766d 100644
>>>> --- a/io/channel-socket.c
>>>> +++ b/io/channel-socket.c
>>>> @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
>>>>        }
>>>>    #endif /* WIN32 */
>>>> +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
>>>> +
>>> This covers the incoming server side socket.
>>>
>>> This also needs to be set in outgoing client side socket in
>>> qio_channel_socket_connect_async
>>
>> Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
>>
>>>> @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>>>>    }
>>>>    #endif /* WIN32 */
>>>> -
>>>>    #ifdef QEMU_MSG_ZEROCOPY
>>>>    static int qio_channel_socket_flush(QIOChannel *ioc,
>>>>                                        Error **errp)
>>> Please remove this unrelated whitespace change.
>>>
>>>
>>>> @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
>>>>        return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
>>>>    }
>>>> +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
>>>> +                                   const struct iovec *iov,
>>>> +                                   size_t niov,
>>>> +                                   Error **errp)
>>>> +{
>>>> +   ssize_t len = 0;
>>>> +   ssize_t total = iov_size(iov, niov);
>>>> +
>>>> +   while (len < total) {
>>>> +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
>>>> +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
>>>> +
>>>> +       if (len == QIO_CHANNEL_ERR_BLOCK) {
>>>> +            if (qemu_in_coroutine()) {
>>>> +                qio_channel_yield(ioc, G_IO_IN);
>>>> +            } else {
>>>> +                qio_channel_wait(ioc, G_IO_IN);
>>>> +            }
>>>> +            continue;
>>>> +       }
>>>> +       if (len == 0) {
>>>> +           return 0;
>>>> +       }
>>>> +       if (len < 0) {
>>>> +           return -1;
>>>> +       }
>>>> +   }
>>> This will busy wait burning CPU where there is a read > 0 and < total.
>>>
>> Daniel, i could use MSG_WAITALL too if that works but then we will
>> lose opportunity to yield. Or if you have some other idea.
> I fear this is an inherant problem with the idea of using PEEK to
> look at the magic data.
>
> If we actually read the magic bytes off the wire, then we could have
> the same code path for TLS and non-TLS. We would have to modify the
> existing later code paths though to take account of fact that the
> magic was already read by an earlier codepath.
>
> With regards,
> Daniel


sure Daniel, I am happy to drop use of MSG_PEEK, but that way also we have issue with tls for reason we discussed in V2. Is it okay to send a patch with actual read ahead but not for tls case? tls anyway does not have this bug as it does handshake.

Thanks

Manish Mishra



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22  9:40         ` manish.mishra
@ 2022-11-22  9:53           ` Daniel P. Berrangé
  2022-11-22 10:13             ` manish.mishra
  0 siblings, 1 reply; 24+ messages in thread
From: Daniel P. Berrangé @ 2022-11-22  9:53 UTC (permalink / raw)
  To: manish.mishra
  Cc: qemu-devel, peterx, prerna.saxena, quintela, dgilbert, lsoaresp

On Tue, Nov 22, 2022 at 03:10:53PM +0530, manish.mishra wrote:
> 
> On 22/11/22 2:59 pm, Daniel P. Berrangé wrote:
> > On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
> > > On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> > > > On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> > > > > MSG_PEEK reads from the peek of channel, The data is treated as
> > > > > unread and the next read shall still return this data. This
> > > > > support is currently added only for socket class. Extra parameter
> > > > > 'flags' is added to io_readv calls to pass extra read flags like
> > > > > MSG_PEEK.
> > > > > 
> > > > > Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> > > > > Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
> > > > > ---
> > > > >    chardev/char-socket.c               |  4 +-
> > > > >    include/io/channel.h                | 83 +++++++++++++++++++++++++++++
> > > > >    io/channel-buffer.c                 |  1 +
> > > > >    io/channel-command.c                |  1 +
> > > > >    io/channel-file.c                   |  1 +
> > > > >    io/channel-null.c                   |  1 +
> > > > >    io/channel-socket.c                 | 16 +++++-
> > > > >    io/channel-tls.c                    |  1 +
> > > > >    io/channel-websock.c                |  1 +
> > > > >    io/channel.c                        | 73 +++++++++++++++++++++++--
> > > > >    migration/channel-block.c           |  1 +
> > > > >    scsi/qemu-pr-helper.c               |  2 +-
> > > > >    tests/qtest/tpm-emu.c               |  2 +-
> > > > >    tests/unit/test-io-channel-socket.c |  1 +
> > > > >    util/vhost-user-server.c            |  2 +-
> > > > >    15 files changed, 179 insertions(+), 11 deletions(-)
> > > > 
> > > > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > > > index b76dca9cc1..a06b24766d 100644
> > > > > --- a/io/channel-socket.c
> > > > > +++ b/io/channel-socket.c
> > > > > @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> > > > >        }
> > > > >    #endif /* WIN32 */
> > > > > +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> > > > > +
> > > > This covers the incoming server side socket.
> > > > 
> > > > This also needs to be set in outgoing client side socket in
> > > > qio_channel_socket_connect_async
> > > 
> > > Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
> > > 
> > > > > @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > > > >    }
> > > > >    #endif /* WIN32 */
> > > > > -
> > > > >    #ifdef QEMU_MSG_ZEROCOPY
> > > > >    static int qio_channel_socket_flush(QIOChannel *ioc,
> > > > >                                        Error **errp)
> > > > Please remove this unrelated whitespace change.
> > > > 
> > > > 
> > > > > @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
> > > > >        return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
> > > > >    }
> > > > > +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> > > > > +                                   const struct iovec *iov,
> > > > > +                                   size_t niov,
> > > > > +                                   Error **errp)
> > > > > +{
> > > > > +   ssize_t len = 0;
> > > > > +   ssize_t total = iov_size(iov, niov);
> > > > > +
> > > > > +   while (len < total) {
> > > > > +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> > > > > +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> > > > > +
> > > > > +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > > > +            if (qemu_in_coroutine()) {
> > > > > +                qio_channel_yield(ioc, G_IO_IN);
> > > > > +            } else {
> > > > > +                qio_channel_wait(ioc, G_IO_IN);
> > > > > +            }
> > > > > +            continue;
> > > > > +       }
> > > > > +       if (len == 0) {
> > > > > +           return 0;
> > > > > +       }
> > > > > +       if (len < 0) {
> > > > > +           return -1;
> > > > > +       }
> > > > > +   }
> > > > This will busy wait burning CPU where there is a read > 0 and < total.
> > > > 
> > > Daniel, i could use MSG_WAITALL too if that works but then we will
> > > lose opportunity to yield. Or if you have some other idea.
> > I fear this is an inherant problem with the idea of using PEEK to
> > look at the magic data.
> > 
> > If we actually read the magic bytes off the wire, then we could have
> > the same code path for TLS and non-TLS. We would have to modify the
> > existing later code paths though to take account of fact that the
> > magic was already read by an earlier codepath.
> > 
> > With regards,
> > Daniel
> 
> 
> sure Daniel, I am happy to drop use of MSG_PEEK, but that way also we
> have issue with tls for reason we discussed in V2. Is it okay to send
> a patch with actual read ahead but not for tls case? tls anyway does
> not have this bug as it does handshake.

I've re-read the previous threads, but I don't see what the problem
with TLS is.  We already decided that TLS is not affected by the
race condition. So there should be no problem in reading the magic
bytes early on the TLS channels, while reading the bytes early on
a non-TLS channel will fix the race condition. 

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22  9:53           ` Daniel P. Berrangé
@ 2022-11-22 10:13             ` manish.mishra
  2022-11-22 10:31               ` Daniel P. Berrangé
  0 siblings, 1 reply; 24+ messages in thread
From: manish.mishra @ 2022-11-22 10:13 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: qemu-devel, peterx, prerna.saxena, quintela, dgilbert, lsoaresp


On 22/11/22 3:23 pm, Daniel P. Berrangé wrote:
> On Tue, Nov 22, 2022 at 03:10:53PM +0530, manish.mishra wrote:
>> On 22/11/22 2:59 pm, Daniel P. Berrangé wrote:
>>> On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
>>>> On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
>>>>> On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
>>>>>> MSG_PEEK reads from the peek of channel, The data is treated as
>>>>>> unread and the next read shall still return this data. This
>>>>>> support is currently added only for socket class. Extra parameter
>>>>>> 'flags' is added to io_readv calls to pass extra read flags like
>>>>>> MSG_PEEK.
>>>>>>
>>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com
>>>>>> Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
>>>>>> ---
>>>>>>     chardev/char-socket.c               |  4 +-
>>>>>>     include/io/channel.h                | 83 +++++++++++++++++++++++++++++
>>>>>>     io/channel-buffer.c                 |  1 +
>>>>>>     io/channel-command.c                |  1 +
>>>>>>     io/channel-file.c                   |  1 +
>>>>>>     io/channel-null.c                   |  1 +
>>>>>>     io/channel-socket.c                 | 16 +++++-
>>>>>>     io/channel-tls.c                    |  1 +
>>>>>>     io/channel-websock.c                |  1 +
>>>>>>     io/channel.c                        | 73 +++++++++++++++++++++++--
>>>>>>     migration/channel-block.c           |  1 +
>>>>>>     scsi/qemu-pr-helper.c               |  2 +-
>>>>>>     tests/qtest/tpm-emu.c               |  2 +-
>>>>>>     tests/unit/test-io-channel-socket.c |  1 +
>>>>>>     util/vhost-user-server.c            |  2 +-
>>>>>>     15 files changed, 179 insertions(+), 11 deletions(-)
>>>>>> diff --git a/io/channel-socket.c b/io/channel-socket.c
>>>>>> index b76dca9cc1..a06b24766d 100644
>>>>>> --- a/io/channel-socket.c
>>>>>> +++ b/io/channel-socket.c
>>>>>> @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
>>>>>>         }
>>>>>>     #endif /* WIN32 */
>>>>>> +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
>>>>>> +
>>>>> This covers the incoming server side socket.
>>>>>
>>>>> This also needs to be set in outgoing client side socket in
>>>>> qio_channel_socket_connect_async
>>>> Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
>>>>
>>>>>> @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>>>>>>     }
>>>>>>     #endif /* WIN32 */
>>>>>> -
>>>>>>     #ifdef QEMU_MSG_ZEROCOPY
>>>>>>     static int qio_channel_socket_flush(QIOChannel *ioc,
>>>>>>                                         Error **errp)
>>>>> Please remove this unrelated whitespace change.
>>>>>
>>>>>
>>>>>> @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
>>>>>>         return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
>>>>>>     }
>>>>>> +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
>>>>>> +                                   const struct iovec *iov,
>>>>>> +                                   size_t niov,
>>>>>> +                                   Error **errp)
>>>>>> +{
>>>>>> +   ssize_t len = 0;
>>>>>> +   ssize_t total = iov_size(iov, niov);
>>>>>> +
>>>>>> +   while (len < total) {
>>>>>> +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
>>>>>> +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
>>>>>> +
>>>>>> +       if (len == QIO_CHANNEL_ERR_BLOCK) {
>>>>>> +            if (qemu_in_coroutine()) {
>>>>>> +                qio_channel_yield(ioc, G_IO_IN);
>>>>>> +            } else {
>>>>>> +                qio_channel_wait(ioc, G_IO_IN);
>>>>>> +            }
>>>>>> +            continue;
>>>>>> +       }
>>>>>> +       if (len == 0) {
>>>>>> +           return 0;
>>>>>> +       }
>>>>>> +       if (len < 0) {
>>>>>> +           return -1;
>>>>>> +       }
>>>>>> +   }
>>>>> This will busy wait burning CPU where there is a read > 0 and < total.
>>>>>
>>>> Daniel, i could use MSG_WAITALL too if that works but then we will
>>>> lose opportunity to yield. Or if you have some other idea.
>>> I fear this is an inherant problem with the idea of using PEEK to
>>> look at the magic data.
>>>
>>> If we actually read the magic bytes off the wire, then we could have
>>> the same code path for TLS and non-TLS. We would have to modify the
>>> existing later code paths though to take account of fact that the
>>> magic was already read by an earlier codepath.
>>>
>>> With regards,
>>> Daniel
>>
>> sure Daniel, I am happy to drop use of MSG_PEEK, but that way also we
>> have issue with tls for reason we discussed in V2. Is it okay to send
>> a patch with actual read ahead but not for tls case? tls anyway does
>> not have this bug as it does handshake.
> I've re-read the previous threads, but I don't see what the problem
> with TLS is.  We already decided that TLS is not affected by the
> race condition. So there should be no problem in reading the magic
> bytes early on the TLS channels, while reading the bytes early on
> a non-TLS channel will fix the race condition.


Actually with tls all channels requires handshake to be assumed established, and from source side we do initial qemu_flush only when all channels are established. But on destination side we will stuck on reading magic for main channel itself which never comes because source has not flushed data, so no new connections can be established(e.g. multiFD). So basically destination can not accept any new channel until we read from main channel and source is not putting any data on main channel until all channels are established. So if we read ahread in ioc_process_incoming_channel there is this deadlock with tls. This issue is not there with non-tls case, because there on source side we assume a connection established once connect() call is successful.


Thanks

Manish Mishra

>
> With regards,
> Daniel


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22 10:13             ` manish.mishra
@ 2022-11-22 10:31               ` Daniel P. Berrangé
  0 siblings, 0 replies; 24+ messages in thread
From: Daniel P. Berrangé @ 2022-11-22 10:31 UTC (permalink / raw)
  To: manish.mishra
  Cc: qemu-devel, peterx, prerna.saxena, quintela, dgilbert, lsoaresp

On Tue, Nov 22, 2022 at 03:43:55PM +0530, manish.mishra wrote:
> 
> On 22/11/22 3:23 pm, Daniel P. Berrangé wrote:
> > On Tue, Nov 22, 2022 at 03:10:53PM +0530, manish.mishra wrote:
> > > On 22/11/22 2:59 pm, Daniel P. Berrangé wrote:
> > > > On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
> > > > > On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> > > > > > On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> > > > > > > MSG_PEEK reads from the peek of channel, The data is treated as
> > > > > > > unread and the next read shall still return this data. This
> > > > > > > support is currently added only for socket class. Extra parameter
> > > > > > > 'flags' is added to io_readv calls to pass extra read flags like
> > > > > > > MSG_PEEK.
> > > > > > > 
> > > > > > > Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> > > > > > > Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
> > > > > > > ---
> > > > > > >     chardev/char-socket.c               |  4 +-
> > > > > > >     include/io/channel.h                | 83 +++++++++++++++++++++++++++++
> > > > > > >     io/channel-buffer.c                 |  1 +
> > > > > > >     io/channel-command.c                |  1 +
> > > > > > >     io/channel-file.c                   |  1 +
> > > > > > >     io/channel-null.c                   |  1 +
> > > > > > >     io/channel-socket.c                 | 16 +++++-
> > > > > > >     io/channel-tls.c                    |  1 +
> > > > > > >     io/channel-websock.c                |  1 +
> > > > > > >     io/channel.c                        | 73 +++++++++++++++++++++++--
> > > > > > >     migration/channel-block.c           |  1 +
> > > > > > >     scsi/qemu-pr-helper.c               |  2 +-
> > > > > > >     tests/qtest/tpm-emu.c               |  2 +-
> > > > > > >     tests/unit/test-io-channel-socket.c |  1 +
> > > > > > >     util/vhost-user-server.c            |  2 +-
> > > > > > >     15 files changed, 179 insertions(+), 11 deletions(-)
> > > > > > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > > > > > index b76dca9cc1..a06b24766d 100644
> > > > > > > --- a/io/channel-socket.c
> > > > > > > +++ b/io/channel-socket.c
> > > > > > > @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> > > > > > >         }
> > > > > > >     #endif /* WIN32 */
> > > > > > > +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> > > > > > > +
> > > > > > This covers the incoming server side socket.
> > > > > > 
> > > > > > This also needs to be set in outgoing client side socket in
> > > > > > qio_channel_socket_connect_async
> > > > > Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
> > > > > 
> > > > > > > @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > > > > > >     }
> > > > > > >     #endif /* WIN32 */
> > > > > > > -
> > > > > > >     #ifdef QEMU_MSG_ZEROCOPY
> > > > > > >     static int qio_channel_socket_flush(QIOChannel *ioc,
> > > > > > >                                         Error **errp)
> > > > > > Please remove this unrelated whitespace change.
> > > > > > 
> > > > > > 
> > > > > > > @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
> > > > > > >         return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
> > > > > > >     }
> > > > > > > +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> > > > > > > +                                   const struct iovec *iov,
> > > > > > > +                                   size_t niov,
> > > > > > > +                                   Error **errp)
> > > > > > > +{
> > > > > > > +   ssize_t len = 0;
> > > > > > > +   ssize_t total = iov_size(iov, niov);
> > > > > > > +
> > > > > > > +   while (len < total) {
> > > > > > > +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> > > > > > > +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> > > > > > > +
> > > > > > > +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > > > > > +            if (qemu_in_coroutine()) {
> > > > > > > +                qio_channel_yield(ioc, G_IO_IN);
> > > > > > > +            } else {
> > > > > > > +                qio_channel_wait(ioc, G_IO_IN);
> > > > > > > +            }
> > > > > > > +            continue;
> > > > > > > +       }
> > > > > > > +       if (len == 0) {
> > > > > > > +           return 0;
> > > > > > > +       }
> > > > > > > +       if (len < 0) {
> > > > > > > +           return -1;
> > > > > > > +       }
> > > > > > > +   }
> > > > > > This will busy wait burning CPU where there is a read > 0 and < total.
> > > > > > 
> > > > > Daniel, i could use MSG_WAITALL too if that works but then we will
> > > > > lose opportunity to yield. Or if you have some other idea.
> > > > I fear this is an inherant problem with the idea of using PEEK to
> > > > look at the magic data.
> > > > 
> > > > If we actually read the magic bytes off the wire, then we could have
> > > > the same code path for TLS and non-TLS. We would have to modify the
> > > > existing later code paths though to take account of fact that the
> > > > magic was already read by an earlier codepath.
> > > > 
> > > > With regards,
> > > > Daniel
> > > 
> > > sure Daniel, I am happy to drop use of MSG_PEEK, but that way also we
> > > have issue with tls for reason we discussed in V2. Is it okay to send
> > > a patch with actual read ahead but not for tls case? tls anyway does
> > > not have this bug as it does handshake.
> > I've re-read the previous threads, but I don't see what the problem
> > with TLS is.  We already decided that TLS is not affected by the
> > race condition. So there should be no problem in reading the magic
> > bytes early on the TLS channels, while reading the bytes early on
> > a non-TLS channel will fix the race condition.
> 
> 
> Actually with tls all channels requires handshake to be assumed established,
> and from source side we do initial qemu_flush only when all channels are
> established. But on destination side we will stuck on reading magic for
> main channel itself which never comes because source has not flushed data,
> so no new connections can be established(e.g. multiFD). So basically
> destination can not accept any new channel until we read from main
> channel and source is not putting any data on main channel until all
> channels are established. So if we read ahread in
> ioc_process_incoming_channel there is this deadlock with tls. This issue
> is not there with non-tls case, because there on source side we assume a
> connection established once connect() call is successful.

Ah yes, I forgot about the 'flush' problem. Reading magic in non-TLS case
is OK then i guess.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22  9:08     ` manish.mishra
  2022-11-22  9:29       ` Daniel P. Berrangé
@ 2022-11-22 14:41       ` Peter Xu
  2022-11-22 14:49         ` Daniel P. Berrangé
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Xu @ 2022-11-22 14:41 UTC (permalink / raw)
  To: manish.mishra
  Cc: Daniel P. Berrangé,
	qemu-devel, prerna.saxena, quintela, dgilbert, lsoaresp

On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
> 
> On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> > On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> > > MSG_PEEK reads from the peek of channel, The data is treated as
> > > unread and the next read shall still return this data. This
> > > support is currently added only for socket class. Extra parameter
> > > 'flags' is added to io_readv calls to pass extra read flags like
> > > MSG_PEEK.
> > > 
> > > Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> > > Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
> > > ---
> > >   chardev/char-socket.c               |  4 +-
> > >   include/io/channel.h                | 83 +++++++++++++++++++++++++++++
> > >   io/channel-buffer.c                 |  1 +
> > >   io/channel-command.c                |  1 +
> > >   io/channel-file.c                   |  1 +
> > >   io/channel-null.c                   |  1 +
> > >   io/channel-socket.c                 | 16 +++++-
> > >   io/channel-tls.c                    |  1 +
> > >   io/channel-websock.c                |  1 +
> > >   io/channel.c                        | 73 +++++++++++++++++++++++--
> > >   migration/channel-block.c           |  1 +
> > >   scsi/qemu-pr-helper.c               |  2 +-
> > >   tests/qtest/tpm-emu.c               |  2 +-
> > >   tests/unit/test-io-channel-socket.c |  1 +
> > >   util/vhost-user-server.c            |  2 +-
> > >   15 files changed, 179 insertions(+), 11 deletions(-)
> > 
> > 
> > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > index b76dca9cc1..a06b24766d 100644
> > > --- a/io/channel-socket.c
> > > +++ b/io/channel-socket.c
> > > @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> > >       }
> > >   #endif /* WIN32 */
> > > +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> > > +
> > This covers the incoming server side socket.
> > 
> > This also needs to be set in outgoing client side socket in
> > qio_channel_socket_connect_async
> 
> 
> Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
> 
> > 
> > > @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > >   }
> > >   #endif /* WIN32 */
> > > -
> > >   #ifdef QEMU_MSG_ZEROCOPY
> > >   static int qio_channel_socket_flush(QIOChannel *ioc,
> > >                                       Error **errp)
> > Please remove this unrelated whitespace change.
> > 
> > 
> > > @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
> > >       return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
> > >   }
> > > +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> > > +                                   const struct iovec *iov,
> > > +                                   size_t niov,
> > > +                                   Error **errp)
> > > +{
> > > +   ssize_t len = 0;
> > > +   ssize_t total = iov_size(iov, niov);
> > > +
> > > +   while (len < total) {
> > > +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> > > +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> > > +
> > > +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > +            if (qemu_in_coroutine()) {
> > > +                qio_channel_yield(ioc, G_IO_IN);
> > > +            } else {
> > > +                qio_channel_wait(ioc, G_IO_IN);
> > > +            }
> > > +            continue;
> > > +       }
> > > +       if (len == 0) {
> > > +           return 0;
> > > +       }
> > > +       if (len < 0) {
> > > +           return -1;
> > > +       }
> > > +   }
> > This will busy wait burning CPU where there is a read > 0 and < total.
> > 
> 
> Daniel, i could use MSG_WAITALL too if that works but then we will lose opportunity to yield. Or if you have some other idea.

How easy would this happen?

Another alternative is we could just return the partial len to caller then
we fallback to the original channel orders if it happens.  And then if it
mostly will never happen it'll behave merely the same as what we want.

The thing is the other approach will be hacky in another way (have a flag
migration_consumed_4_bytes_header to either main and multifd channels),
then if it'll solve 99.99% cases I'd think it's good enough.  Anyway we're
working on a corner case already on unreliable network, and even if failure
triggered it's not so bad - we just redo the migration.

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22 14:41       ` Peter Xu
@ 2022-11-22 14:49         ` Daniel P. Berrangé
  2022-11-22 15:31           ` manish.mishra
  0 siblings, 1 reply; 24+ messages in thread
From: Daniel P. Berrangé @ 2022-11-22 14:49 UTC (permalink / raw)
  To: Peter Xu
  Cc: manish.mishra, qemu-devel, prerna.saxena, quintela, dgilbert, lsoaresp

On Tue, Nov 22, 2022 at 09:41:01AM -0500, Peter Xu wrote:
> On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
> > 
> > On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> > > On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> > > > MSG_PEEK reads from the peek of channel, The data is treated as
> > > > unread and the next read shall still return this data. This
> > > > support is currently added only for socket class. Extra parameter
> > > > 'flags' is added to io_readv calls to pass extra read flags like
> > > > MSG_PEEK.
> > > > 
> > > > Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> > > > Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
> > > > ---
> > > >   chardev/char-socket.c               |  4 +-
> > > >   include/io/channel.h                | 83 +++++++++++++++++++++++++++++
> > > >   io/channel-buffer.c                 |  1 +
> > > >   io/channel-command.c                |  1 +
> > > >   io/channel-file.c                   |  1 +
> > > >   io/channel-null.c                   |  1 +
> > > >   io/channel-socket.c                 | 16 +++++-
> > > >   io/channel-tls.c                    |  1 +
> > > >   io/channel-websock.c                |  1 +
> > > >   io/channel.c                        | 73 +++++++++++++++++++++++--
> > > >   migration/channel-block.c           |  1 +
> > > >   scsi/qemu-pr-helper.c               |  2 +-
> > > >   tests/qtest/tpm-emu.c               |  2 +-
> > > >   tests/unit/test-io-channel-socket.c |  1 +
> > > >   util/vhost-user-server.c            |  2 +-
> > > >   15 files changed, 179 insertions(+), 11 deletions(-)
> > > 
> > > 
> > > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > > index b76dca9cc1..a06b24766d 100644
> > > > --- a/io/channel-socket.c
> > > > +++ b/io/channel-socket.c
> > > > @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> > > >       }
> > > >   #endif /* WIN32 */
> > > > +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> > > > +
> > > This covers the incoming server side socket.
> > > 
> > > This also needs to be set in outgoing client side socket in
> > > qio_channel_socket_connect_async
> > 
> > 
> > Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
> > 
> > > 
> > > > @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > > >   }
> > > >   #endif /* WIN32 */
> > > > -
> > > >   #ifdef QEMU_MSG_ZEROCOPY
> > > >   static int qio_channel_socket_flush(QIOChannel *ioc,
> > > >                                       Error **errp)
> > > Please remove this unrelated whitespace change.
> > > 
> > > 
> > > > @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
> > > >       return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
> > > >   }
> > > > +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> > > > +                                   const struct iovec *iov,
> > > > +                                   size_t niov,
> > > > +                                   Error **errp)
> > > > +{
> > > > +   ssize_t len = 0;
> > > > +   ssize_t total = iov_size(iov, niov);
> > > > +
> > > > +   while (len < total) {
> > > > +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> > > > +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> > > > +
> > > > +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > > +            if (qemu_in_coroutine()) {
> > > > +                qio_channel_yield(ioc, G_IO_IN);
> > > > +            } else {
> > > > +                qio_channel_wait(ioc, G_IO_IN);
> > > > +            }
> > > > +            continue;
> > > > +       }
> > > > +       if (len == 0) {
> > > > +           return 0;
> > > > +       }
> > > > +       if (len < 0) {
> > > > +           return -1;
> > > > +       }
> > > > +   }
> > > This will busy wait burning CPU where there is a read > 0 and < total.
> > > 
> > 
> > Daniel, i could use MSG_WAITALL too if that works but then we will lose opportunity to yield. Or if you have some other idea.
> 
> How easy would this happen?
> 
> Another alternative is we could just return the partial len to caller then
> we fallback to the original channel orders if it happens.  And then if it
> mostly will never happen it'll behave merely the same as what we want.

Well we're trying to deal with a bug where the slow and/or unreliable
network causes channels to arrive in unexpected order. Given we know
we're having network trouble, I wouldn't want to make more assumptions
about things happening correctly.


With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22 14:49         ` Daniel P. Berrangé
@ 2022-11-22 15:31           ` manish.mishra
  2022-11-22 16:10             ` Peter Xu
  0 siblings, 1 reply; 24+ messages in thread
From: manish.mishra @ 2022-11-22 15:31 UTC (permalink / raw)
  To: Daniel P. Berrangé, Peter Xu
  Cc: qemu-devel, prerna.saxena, quintela, dgilbert, lsoaresp

[-- Attachment #1: Type: text/plain, Size: 5373 bytes --]


On 22/11/22 8:19 pm, Daniel P. Berrangé wrote:
> On Tue, Nov 22, 2022 at 09:41:01AM -0500, Peter Xu wrote:
>> On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
>>> On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
>>>> On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
>>>>> MSG_PEEK reads from the peek of channel, The data is treated as
>>>>> unread and the next read shall still return this data. This
>>>>> support is currently added only for socket class. Extra parameter
>>>>> 'flags' is added to io_readv calls to pass extra read flags like
>>>>> MSG_PEEK.
>>>>>
>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com
>>>>> Signed-off-by: manish.mishra<manish.mishra@nutanix.com>
>>>>> ---
>>>>>    chardev/char-socket.c               |  4 +-
>>>>>    include/io/channel.h                | 83 +++++++++++++++++++++++++++++
>>>>>    io/channel-buffer.c                 |  1 +
>>>>>    io/channel-command.c                |  1 +
>>>>>    io/channel-file.c                   |  1 +
>>>>>    io/channel-null.c                   |  1 +
>>>>>    io/channel-socket.c                 | 16 +++++-
>>>>>    io/channel-tls.c                    |  1 +
>>>>>    io/channel-websock.c                |  1 +
>>>>>    io/channel.c                        | 73 +++++++++++++++++++++++--
>>>>>    migration/channel-block.c           |  1 +
>>>>>    scsi/qemu-pr-helper.c               |  2 +-
>>>>>    tests/qtest/tpm-emu.c               |  2 +-
>>>>>    tests/unit/test-io-channel-socket.c |  1 +
>>>>>    util/vhost-user-server.c            |  2 +-
>>>>>    15 files changed, 179 insertions(+), 11 deletions(-)
>>>>
>>>>> diff --git a/io/channel-socket.c b/io/channel-socket.c
>>>>> index b76dca9cc1..a06b24766d 100644
>>>>> --- a/io/channel-socket.c
>>>>> +++ b/io/channel-socket.c
>>>>> @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
>>>>>        }
>>>>>    #endif /* WIN32 */
>>>>> +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
>>>>> +
>>>> This covers the incoming server side socket.
>>>>
>>>> This also needs to be set in outgoing client side socket in
>>>> qio_channel_socket_connect_async
>>>
>>> Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
>>>
>>>>> @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>>>>>    }
>>>>>    #endif /* WIN32 */
>>>>> -
>>>>>    #ifdef QEMU_MSG_ZEROCOPY
>>>>>    static int qio_channel_socket_flush(QIOChannel *ioc,
>>>>>                                        Error **errp)
>>>> Please remove this unrelated whitespace change.
>>>>
>>>>
>>>>> @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
>>>>>        return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
>>>>>    }
>>>>> +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
>>>>> +                                   const struct iovec *iov,
>>>>> +                                   size_t niov,
>>>>> +                                   Error **errp)
>>>>> +{
>>>>> +   ssize_t len = 0;
>>>>> +   ssize_t total = iov_size(iov, niov);
>>>>> +
>>>>> +   while (len < total) {
>>>>> +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
>>>>> +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
>>>>> +
>>>>> +       if (len == QIO_CHANNEL_ERR_BLOCK) {
>>>>> +            if (qemu_in_coroutine()) {
>>>>> +                qio_channel_yield(ioc, G_IO_IN);
>>>>> +            } else {
>>>>> +                qio_channel_wait(ioc, G_IO_IN);
>>>>> +            }
>>>>> +            continue;
>>>>> +       }
>>>>> +       if (len == 0) {
>>>>> +           return 0;
>>>>> +       }
>>>>> +       if (len < 0) {
>>>>> +           return -1;
>>>>> +       }
>>>>> +   }
>>>> This will busy wait burning CPU where there is a read > 0 and < total.
>>>>
>>> Daniel, i could use MSG_WAITALL too if that works but then we will lose opportunity to yield. Or if you have some other idea.
>> How easy would this happen?
>>
>> Another alternative is we could just return the partial len to caller then
>> we fallback to the original channel orders if it happens.  And then if it
>> mostly will never happen it'll behave merely the same as what we want.
> Well we're trying to deal with a bug where the slow and/or unreliable
> network causes channels to arrive in unexpected order. Given we know
> we're having network trouble, I wouldn't want to make more assumptions
> about things happening correctly.
>
>
> With regards,
> Daniel


Peter, I have seen MSG_PEEK used in combination with MSG_WAITALL, but looks like even though chances are less it can still return partial data even with multiple retries for signal case, so is not full proof.

*MSG_WAITALL *(since Linux 2.2)
               This flag requests that the operation block until the full
               request is satisfied.  However, the call may still return
               less data than requested if a signal is caught, an error
               or disconnect occurs, or the next data to be received is
               of a different type than that returned.  This flag has no
               effect for datagram sockets.

Actual read ahead will be little hackish, so just confirming we all are in agreement to do actual read ahead and i can send patch? :)

Thanks
Manish Mishra

[-- Attachment #2: Type: text/html, Size: 6727 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22 15:31           ` manish.mishra
@ 2022-11-22 16:10             ` Peter Xu
  2022-11-22 16:29               ` Peter Xu
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Xu @ 2022-11-22 16:10 UTC (permalink / raw)
  To: manish.mishra
  Cc: Daniel P. Berrangé,
	qemu-devel, prerna.saxena, quintela, dgilbert, lsoaresp

On Tue, Nov 22, 2022 at 09:01:59PM +0530, manish.mishra wrote:
> 
> On 22/11/22 8:19 pm, Daniel P. Berrangé wrote:
> > On Tue, Nov 22, 2022 at 09:41:01AM -0500, Peter Xu wrote:
> > > On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
> > > > On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> > > > > On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> > > > > > MSG_PEEK reads from the peek of channel, The data is treated as
> > > > > > unread and the next read shall still return this data. This
> > > > > > support is currently added only for socket class. Extra parameter
> > > > > > 'flags' is added to io_readv calls to pass extra read flags like
> > > > > > MSG_PEEK.
> > > > > > 
> > > > > > Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> > > > > > Signed-off-by: manish.mishra<manish.mishra@nutanix.com>
> > > > > > ---
> > > > > >    chardev/char-socket.c               |  4 +-
> > > > > >    include/io/channel.h                | 83 +++++++++++++++++++++++++++++
> > > > > >    io/channel-buffer.c                 |  1 +
> > > > > >    io/channel-command.c                |  1 +
> > > > > >    io/channel-file.c                   |  1 +
> > > > > >    io/channel-null.c                   |  1 +
> > > > > >    io/channel-socket.c                 | 16 +++++-
> > > > > >    io/channel-tls.c                    |  1 +
> > > > > >    io/channel-websock.c                |  1 +
> > > > > >    io/channel.c                        | 73 +++++++++++++++++++++++--
> > > > > >    migration/channel-block.c           |  1 +
> > > > > >    scsi/qemu-pr-helper.c               |  2 +-
> > > > > >    tests/qtest/tpm-emu.c               |  2 +-
> > > > > >    tests/unit/test-io-channel-socket.c |  1 +
> > > > > >    util/vhost-user-server.c            |  2 +-
> > > > > >    15 files changed, 179 insertions(+), 11 deletions(-)
> > > > > 
> > > > > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > > > > index b76dca9cc1..a06b24766d 100644
> > > > > > --- a/io/channel-socket.c
> > > > > > +++ b/io/channel-socket.c
> > > > > > @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> > > > > >        }
> > > > > >    #endif /* WIN32 */
> > > > > > +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> > > > > > +
> > > > > This covers the incoming server side socket.
> > > > > 
> > > > > This also needs to be set in outgoing client side socket in
> > > > > qio_channel_socket_connect_async
> > > > 
> > > > Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
> > > > 
> > > > > > @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > > > > >    }
> > > > > >    #endif /* WIN32 */
> > > > > > -
> > > > > >    #ifdef QEMU_MSG_ZEROCOPY
> > > > > >    static int qio_channel_socket_flush(QIOChannel *ioc,
> > > > > >                                        Error **errp)
> > > > > Please remove this unrelated whitespace change.
> > > > > 
> > > > > 
> > > > > > @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
> > > > > >        return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
> > > > > >    }
> > > > > > +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> > > > > > +                                   const struct iovec *iov,
> > > > > > +                                   size_t niov,
> > > > > > +                                   Error **errp)
> > > > > > +{
> > > > > > +   ssize_t len = 0;
> > > > > > +   ssize_t total = iov_size(iov, niov);
> > > > > > +
> > > > > > +   while (len < total) {
> > > > > > +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> > > > > > +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> > > > > > +
> > > > > > +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > > > > +            if (qemu_in_coroutine()) {
> > > > > > +                qio_channel_yield(ioc, G_IO_IN);
> > > > > > +            } else {
> > > > > > +                qio_channel_wait(ioc, G_IO_IN);
> > > > > > +            }
> > > > > > +            continue;
> > > > > > +       }
> > > > > > +       if (len == 0) {
> > > > > > +           return 0;
> > > > > > +       }
> > > > > > +       if (len < 0) {
> > > > > > +           return -1;
> > > > > > +       }
> > > > > > +   }
> > > > > This will busy wait burning CPU where there is a read > 0 and < total.
> > > > > 
> > > > Daniel, i could use MSG_WAITALL too if that works but then we will lose opportunity to yield. Or if you have some other idea.
> > > How easy would this happen?
> > > 
> > > Another alternative is we could just return the partial len to caller then
> > > we fallback to the original channel orders if it happens.  And then if it
> > > mostly will never happen it'll behave merely the same as what we want.
> > Well we're trying to deal with a bug where the slow and/or unreliable
> > network causes channels to arrive in unexpected order. Given we know
> > we're having network trouble, I wouldn't want to make more assumptions
> > about things happening correctly.
> > 
> > 
> > With regards,
> > Daniel
> 
> 
> Peter, I have seen MSG_PEEK used in combination with MSG_WAITALL, but looks like even though chances are less it can still return partial data even with multiple retries for signal case, so is not full proof.
> 
> *MSG_WAITALL *(since Linux 2.2)
>               This flag requests that the operation block until the full
>               request is satisfied.  However, the call may still return
>               less data than requested if a signal is caught, an error
>               or disconnect occurs, or the next data to be received is
>               of a different type than that returned.  This flag has no
>               effect for datagram sockets.
> 
> Actual read ahead will be little hackish, so just confirming we all are in agreement to do actual read ahead and i can send patch? :)

Yet another option is the caller handles partial PEEK and then we can sleep
in the migration code before another PEEK attempt until it reaches the full
length.

Even with that explicit sleep code IMHO it is cleaner than the read-header
flag plus things like !tls check just to avoid the handshake dead lock
itself (and if to go with this route we'd better also have a full document
on why !tls, aka, how the dead lock can happen).

Would that work?

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22 16:10             ` Peter Xu
@ 2022-11-22 16:29               ` Peter Xu
  2022-11-22 16:33                 ` Peter Xu
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Xu @ 2022-11-22 16:29 UTC (permalink / raw)
  To: manish.mishra
  Cc: Daniel P. Berrangé,
	qemu-devel, prerna.saxena, quintela, dgilbert, lsoaresp

On Tue, Nov 22, 2022 at 11:10:18AM -0500, Peter Xu wrote:
> On Tue, Nov 22, 2022 at 09:01:59PM +0530, manish.mishra wrote:
> > 
> > On 22/11/22 8:19 pm, Daniel P. Berrangé wrote:
> > > On Tue, Nov 22, 2022 at 09:41:01AM -0500, Peter Xu wrote:
> > > > On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
> > > > > On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> > > > > > On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> > > > > > > MSG_PEEK reads from the peek of channel, The data is treated as
> > > > > > > unread and the next read shall still return this data. This
> > > > > > > support is currently added only for socket class. Extra parameter
> > > > > > > 'flags' is added to io_readv calls to pass extra read flags like
> > > > > > > MSG_PEEK.
> > > > > > > 
> > > > > > > Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> > > > > > > Signed-off-by: manish.mishra<manish.mishra@nutanix.com>
> > > > > > > ---
> > > > > > >    chardev/char-socket.c               |  4 +-
> > > > > > >    include/io/channel.h                | 83 +++++++++++++++++++++++++++++
> > > > > > >    io/channel-buffer.c                 |  1 +
> > > > > > >    io/channel-command.c                |  1 +
> > > > > > >    io/channel-file.c                   |  1 +
> > > > > > >    io/channel-null.c                   |  1 +
> > > > > > >    io/channel-socket.c                 | 16 +++++-
> > > > > > >    io/channel-tls.c                    |  1 +
> > > > > > >    io/channel-websock.c                |  1 +
> > > > > > >    io/channel.c                        | 73 +++++++++++++++++++++++--
> > > > > > >    migration/channel-block.c           |  1 +
> > > > > > >    scsi/qemu-pr-helper.c               |  2 +-
> > > > > > >    tests/qtest/tpm-emu.c               |  2 +-
> > > > > > >    tests/unit/test-io-channel-socket.c |  1 +
> > > > > > >    util/vhost-user-server.c            |  2 +-
> > > > > > >    15 files changed, 179 insertions(+), 11 deletions(-)
> > > > > > 
> > > > > > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > > > > > index b76dca9cc1..a06b24766d 100644
> > > > > > > --- a/io/channel-socket.c
> > > > > > > +++ b/io/channel-socket.c
> > > > > > > @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> > > > > > >        }
> > > > > > >    #endif /* WIN32 */
> > > > > > > +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> > > > > > > +
> > > > > > This covers the incoming server side socket.
> > > > > > 
> > > > > > This also needs to be set in outgoing client side socket in
> > > > > > qio_channel_socket_connect_async
> > > > > 
> > > > > Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
> > > > > 
> > > > > > > @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > > > > > >    }
> > > > > > >    #endif /* WIN32 */
> > > > > > > -
> > > > > > >    #ifdef QEMU_MSG_ZEROCOPY
> > > > > > >    static int qio_channel_socket_flush(QIOChannel *ioc,
> > > > > > >                                        Error **errp)
> > > > > > Please remove this unrelated whitespace change.
> > > > > > 
> > > > > > 
> > > > > > > @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
> > > > > > >        return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
> > > > > > >    }
> > > > > > > +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> > > > > > > +                                   const struct iovec *iov,
> > > > > > > +                                   size_t niov,
> > > > > > > +                                   Error **errp)
> > > > > > > +{
> > > > > > > +   ssize_t len = 0;
> > > > > > > +   ssize_t total = iov_size(iov, niov);
> > > > > > > +
> > > > > > > +   while (len < total) {
> > > > > > > +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> > > > > > > +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> > > > > > > +
> > > > > > > +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > > > > > +            if (qemu_in_coroutine()) {
> > > > > > > +                qio_channel_yield(ioc, G_IO_IN);
> > > > > > > +            } else {
> > > > > > > +                qio_channel_wait(ioc, G_IO_IN);
> > > > > > > +            }
> > > > > > > +            continue;
> > > > > > > +       }
> > > > > > > +       if (len == 0) {
> > > > > > > +           return 0;
> > > > > > > +       }
> > > > > > > +       if (len < 0) {
> > > > > > > +           return -1;
> > > > > > > +       }
> > > > > > > +   }
> > > > > > This will busy wait burning CPU where there is a read > 0 and < total.
> > > > > > 
> > > > > Daniel, i could use MSG_WAITALL too if that works but then we will lose opportunity to yield. Or if you have some other idea.
> > > > How easy would this happen?
> > > > 
> > > > Another alternative is we could just return the partial len to caller then
> > > > we fallback to the original channel orders if it happens.  And then if it
> > > > mostly will never happen it'll behave merely the same as what we want.
> > > Well we're trying to deal with a bug where the slow and/or unreliable
> > > network causes channels to arrive in unexpected order. Given we know
> > > we're having network trouble, I wouldn't want to make more assumptions
> > > about things happening correctly.
> > > 
> > > 
> > > With regards,
> > > Daniel
> > 
> > 
> > Peter, I have seen MSG_PEEK used in combination with MSG_WAITALL, but looks like even though chances are less it can still return partial data even with multiple retries for signal case, so is not full proof.
> > 
> > *MSG_WAITALL *(since Linux 2.2)
> >               This flag requests that the operation block until the full
> >               request is satisfied.  However, the call may still return
> >               less data than requested if a signal is caught, an error
> >               or disconnect occurs, or the next data to be received is
> >               of a different type than that returned.  This flag has no
> >               effect for datagram sockets.
> > 
> > Actual read ahead will be little hackish, so just confirming we all are in agreement to do actual read ahead and i can send patch? :)
> 
> Yet another option is the caller handles partial PEEK and then we can sleep
> in the migration code before another PEEK attempt until it reaches the full
> length.
> 
> Even with that explicit sleep code IMHO it is cleaner than the read-header
> flag plus things like !tls check just to avoid the handshake dead lock
> itself (and if to go with this route we'd better also have a full document
> on why !tls, aka, how the dead lock can happen).

Nah, I forgot we're in the same condition as in the main thread.. sorry.

Then how about using qemu_co_sleep_ns_wakeable() to replace
qio_channel_yield() either above, or in the caller?

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22 16:29               ` Peter Xu
@ 2022-11-22 16:33                 ` Peter Xu
  2022-11-22 16:42                   ` manish.mishra
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Xu @ 2022-11-22 16:33 UTC (permalink / raw)
  To: manish.mishra
  Cc: Daniel P. Berrangé,
	qemu-devel, prerna.saxena, quintela, dgilbert, lsoaresp

On Tue, Nov 22, 2022 at 11:29:05AM -0500, Peter Xu wrote:
> On Tue, Nov 22, 2022 at 11:10:18AM -0500, Peter Xu wrote:
> > On Tue, Nov 22, 2022 at 09:01:59PM +0530, manish.mishra wrote:
> > > 
> > > On 22/11/22 8:19 pm, Daniel P. Berrangé wrote:
> > > > On Tue, Nov 22, 2022 at 09:41:01AM -0500, Peter Xu wrote:
> > > > > On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
> > > > > > On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> > > > > > > On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> > > > > > > > MSG_PEEK reads from the peek of channel, The data is treated as
> > > > > > > > unread and the next read shall still return this data. This
> > > > > > > > support is currently added only for socket class. Extra parameter
> > > > > > > > 'flags' is added to io_readv calls to pass extra read flags like
> > > > > > > > MSG_PEEK.
> > > > > > > > 
> > > > > > > > Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> > > > > > > > Signed-off-by: manish.mishra<manish.mishra@nutanix.com>
> > > > > > > > ---
> > > > > > > >    chardev/char-socket.c               |  4 +-
> > > > > > > >    include/io/channel.h                | 83 +++++++++++++++++++++++++++++
> > > > > > > >    io/channel-buffer.c                 |  1 +
> > > > > > > >    io/channel-command.c                |  1 +
> > > > > > > >    io/channel-file.c                   |  1 +
> > > > > > > >    io/channel-null.c                   |  1 +
> > > > > > > >    io/channel-socket.c                 | 16 +++++-
> > > > > > > >    io/channel-tls.c                    |  1 +
> > > > > > > >    io/channel-websock.c                |  1 +
> > > > > > > >    io/channel.c                        | 73 +++++++++++++++++++++++--
> > > > > > > >    migration/channel-block.c           |  1 +
> > > > > > > >    scsi/qemu-pr-helper.c               |  2 +-
> > > > > > > >    tests/qtest/tpm-emu.c               |  2 +-
> > > > > > > >    tests/unit/test-io-channel-socket.c |  1 +
> > > > > > > >    util/vhost-user-server.c            |  2 +-
> > > > > > > >    15 files changed, 179 insertions(+), 11 deletions(-)
> > > > > > > 
> > > > > > > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > > > > > > index b76dca9cc1..a06b24766d 100644
> > > > > > > > --- a/io/channel-socket.c
> > > > > > > > +++ b/io/channel-socket.c
> > > > > > > > @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> > > > > > > >        }
> > > > > > > >    #endif /* WIN32 */
> > > > > > > > +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> > > > > > > > +
> > > > > > > This covers the incoming server side socket.
> > > > > > > 
> > > > > > > This also needs to be set in outgoing client side socket in
> > > > > > > qio_channel_socket_connect_async
> > > > > > 
> > > > > > Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
> > > > > > 
> > > > > > > > @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > > > > > > >    }
> > > > > > > >    #endif /* WIN32 */
> > > > > > > > -
> > > > > > > >    #ifdef QEMU_MSG_ZEROCOPY
> > > > > > > >    static int qio_channel_socket_flush(QIOChannel *ioc,
> > > > > > > >                                        Error **errp)
> > > > > > > Please remove this unrelated whitespace change.
> > > > > > > 
> > > > > > > 
> > > > > > > > @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
> > > > > > > >        return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
> > > > > > > >    }
> > > > > > > > +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> > > > > > > > +                                   const struct iovec *iov,
> > > > > > > > +                                   size_t niov,
> > > > > > > > +                                   Error **errp)
> > > > > > > > +{
> > > > > > > > +   ssize_t len = 0;
> > > > > > > > +   ssize_t total = iov_size(iov, niov);
> > > > > > > > +
> > > > > > > > +   while (len < total) {
> > > > > > > > +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> > > > > > > > +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> > > > > > > > +
> > > > > > > > +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > > > > > > +            if (qemu_in_coroutine()) {
> > > > > > > > +                qio_channel_yield(ioc, G_IO_IN);
> > > > > > > > +            } else {
> > > > > > > > +                qio_channel_wait(ioc, G_IO_IN);
> > > > > > > > +            }
> > > > > > > > +            continue;
> > > > > > > > +       }
> > > > > > > > +       if (len == 0) {
> > > > > > > > +           return 0;
> > > > > > > > +       }
> > > > > > > > +       if (len < 0) {
> > > > > > > > +           return -1;
> > > > > > > > +       }
> > > > > > > > +   }
> > > > > > > This will busy wait burning CPU where there is a read > 0 and < total.
> > > > > > > 
> > > > > > Daniel, i could use MSG_WAITALL too if that works but then we will lose opportunity to yield. Or if you have some other idea.
> > > > > How easy would this happen?
> > > > > 
> > > > > Another alternative is we could just return the partial len to caller then
> > > > > we fallback to the original channel orders if it happens.  And then if it
> > > > > mostly will never happen it'll behave merely the same as what we want.
> > > > Well we're trying to deal with a bug where the slow and/or unreliable
> > > > network causes channels to arrive in unexpected order. Given we know
> > > > we're having network trouble, I wouldn't want to make more assumptions
> > > > about things happening correctly.
> > > > 
> > > > 
> > > > With regards,
> > > > Daniel
> > > 
> > > 
> > > Peter, I have seen MSG_PEEK used in combination with MSG_WAITALL, but looks like even though chances are less it can still return partial data even with multiple retries for signal case, so is not full proof.
> > > 
> > > *MSG_WAITALL *(since Linux 2.2)
> > >               This flag requests that the operation block until the full
> > >               request is satisfied.  However, the call may still return
> > >               less data than requested if a signal is caught, an error
> > >               or disconnect occurs, or the next data to be received is
> > >               of a different type than that returned.  This flag has no
> > >               effect for datagram sockets.
> > > 
> > > Actual read ahead will be little hackish, so just confirming we all are in agreement to do actual read ahead and i can send patch? :)
> > 
> > Yet another option is the caller handles partial PEEK and then we can sleep
> > in the migration code before another PEEK attempt until it reaches the full
> > length.
> > 
> > Even with that explicit sleep code IMHO it is cleaner than the read-header
> > flag plus things like !tls check just to avoid the handshake dead lock
> > itself (and if to go with this route we'd better also have a full document
> > on why !tls, aka, how the dead lock can happen).
> 
> Nah, I forgot we're in the same condition as in the main thread.. sorry.
> 
> Then how about using qemu_co_sleep_ns_wakeable() to replace
> qio_channel_yield() either above, or in the caller?

A better one is qemu_co_sleep_ns().  Off-topic: I'd even think we should
have one qemu_co_sleep_realtime_ns() because currently all callers of
qemu_co_sleep_ns() is for the rt clock.

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22 16:33                 ` Peter Xu
@ 2022-11-22 16:42                   ` manish.mishra
  2022-11-22 17:16                     ` Peter Xu
  0 siblings, 1 reply; 24+ messages in thread
From: manish.mishra @ 2022-11-22 16:42 UTC (permalink / raw)
  To: Peter Xu
  Cc: Daniel P. Berrangé,
	qemu-devel, prerna.saxena, quintela, dgilbert, lsoaresp


On 22/11/22 10:03 pm, Peter Xu wrote:
> On Tue, Nov 22, 2022 at 11:29:05AM -0500, Peter Xu wrote:
>> On Tue, Nov 22, 2022 at 11:10:18AM -0500, Peter Xu wrote:
>>> On Tue, Nov 22, 2022 at 09:01:59PM +0530, manish.mishra wrote:
>>>> On 22/11/22 8:19 pm, Daniel P. Berrangé wrote:
>>>>> On Tue, Nov 22, 2022 at 09:41:01AM -0500, Peter Xu wrote:
>>>>>> On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
>>>>>>> On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
>>>>>>>> On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
>>>>>>>>> MSG_PEEK reads from the peek of channel, The data is treated as
>>>>>>>>> unread and the next read shall still return this data. This
>>>>>>>>> support is currently added only for socket class. Extra parameter
>>>>>>>>> 'flags' is added to io_readv calls to pass extra read flags like
>>>>>>>>> MSG_PEEK.
>>>>>>>>>
>>>>>>>>> Suggested-by: Daniel P. Berrangé <berrange@redhat.com
>>>>>>>>> Signed-off-by: manish.mishra<manish.mishra@nutanix.com>
>>>>>>>>> ---
>>>>>>>>>     chardev/char-socket.c               |  4 +-
>>>>>>>>>     include/io/channel.h                | 83 +++++++++++++++++++++++++++++
>>>>>>>>>     io/channel-buffer.c                 |  1 +
>>>>>>>>>     io/channel-command.c                |  1 +
>>>>>>>>>     io/channel-file.c                   |  1 +
>>>>>>>>>     io/channel-null.c                   |  1 +
>>>>>>>>>     io/channel-socket.c                 | 16 +++++-
>>>>>>>>>     io/channel-tls.c                    |  1 +
>>>>>>>>>     io/channel-websock.c                |  1 +
>>>>>>>>>     io/channel.c                        | 73 +++++++++++++++++++++++--
>>>>>>>>>     migration/channel-block.c           |  1 +
>>>>>>>>>     scsi/qemu-pr-helper.c               |  2 +-
>>>>>>>>>     tests/qtest/tpm-emu.c               |  2 +-
>>>>>>>>>     tests/unit/test-io-channel-socket.c |  1 +
>>>>>>>>>     util/vhost-user-server.c            |  2 +-
>>>>>>>>>     15 files changed, 179 insertions(+), 11 deletions(-)
>>>>>>>>> diff --git a/io/channel-socket.c b/io/channel-socket.c
>>>>>>>>> index b76dca9cc1..a06b24766d 100644
>>>>>>>>> --- a/io/channel-socket.c
>>>>>>>>> +++ b/io/channel-socket.c
>>>>>>>>> @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
>>>>>>>>>         }
>>>>>>>>>     #endif /* WIN32 */
>>>>>>>>> +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
>>>>>>>>> +
>>>>>>>> This covers the incoming server side socket.
>>>>>>>>
>>>>>>>> This also needs to be set in outgoing client side socket in
>>>>>>>> qio_channel_socket_connect_async
>>>>>>> Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
>>>>>>>
>>>>>>>>> @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>>>>>>>>>     }
>>>>>>>>>     #endif /* WIN32 */
>>>>>>>>> -
>>>>>>>>>     #ifdef QEMU_MSG_ZEROCOPY
>>>>>>>>>     static int qio_channel_socket_flush(QIOChannel *ioc,
>>>>>>>>>                                         Error **errp)
>>>>>>>> Please remove this unrelated whitespace change.
>>>>>>>>
>>>>>>>>
>>>>>>>>> @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
>>>>>>>>>         return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
>>>>>>>>>     }
>>>>>>>>> +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
>>>>>>>>> +                                   const struct iovec *iov,
>>>>>>>>> +                                   size_t niov,
>>>>>>>>> +                                   Error **errp)
>>>>>>>>> +{
>>>>>>>>> +   ssize_t len = 0;
>>>>>>>>> +   ssize_t total = iov_size(iov, niov);
>>>>>>>>> +
>>>>>>>>> +   while (len < total) {
>>>>>>>>> +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
>>>>>>>>> +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
>>>>>>>>> +
>>>>>>>>> +       if (len == QIO_CHANNEL_ERR_BLOCK) {
>>>>>>>>> +            if (qemu_in_coroutine()) {
>>>>>>>>> +                qio_channel_yield(ioc, G_IO_IN);
>>>>>>>>> +            } else {
>>>>>>>>> +                qio_channel_wait(ioc, G_IO_IN);
>>>>>>>>> +            }
>>>>>>>>> +            continue;
>>>>>>>>> +       }
>>>>>>>>> +       if (len == 0) {
>>>>>>>>> +           return 0;
>>>>>>>>> +       }
>>>>>>>>> +       if (len < 0) {
>>>>>>>>> +           return -1;
>>>>>>>>> +       }
>>>>>>>>> +   }
>>>>>>>> This will busy wait burning CPU where there is a read > 0 and < total.
>>>>>>>>
>>>>>>> Daniel, i could use MSG_WAITALL too if that works but then we will lose opportunity to yield. Or if you have some other idea.
>>>>>> How easy would this happen?
>>>>>>
>>>>>> Another alternative is we could just return the partial len to caller then
>>>>>> we fallback to the original channel orders if it happens.  And then if it
>>>>>> mostly will never happen it'll behave merely the same as what we want.
>>>>> Well we're trying to deal with a bug where the slow and/or unreliable
>>>>> network causes channels to arrive in unexpected order. Given we know
>>>>> we're having network trouble, I wouldn't want to make more assumptions
>>>>> about things happening correctly.
>>>>>
>>>>>
>>>>> With regards,
>>>>> Daniel
>>>>
>>>> Peter, I have seen MSG_PEEK used in combination with MSG_WAITALL, but looks like even though chances are less it can still return partial data even with multiple retries for signal case, so is not full proof.
>>>>
>>>> *MSG_WAITALL *(since Linux 2.2)
>>>>                This flag requests that the operation block until the full
>>>>                request is satisfied.  However, the call may still return
>>>>                less data than requested if a signal is caught, an error
>>>>                or disconnect occurs, or the next data to be received is
>>>>                of a different type than that returned.  This flag has no
>>>>                effect for datagram sockets.
>>>>
>>>> Actual read ahead will be little hackish, so just confirming we all are in agreement to do actual read ahead and i can send patch? :)
>>> Yet another option is the caller handles partial PEEK and then we can sleep
>>> in the migration code before another PEEK attempt until it reaches the full
>>> length.
>>>
>>> Even with that explicit sleep code IMHO it is cleaner than the read-header
>>> flag plus things like !tls check just to avoid the handshake dead lock
>>> itself (and if to go with this route we'd better also have a full document
>>> on why !tls, aka, how the dead lock can happen).
>> Nah, I forgot we're in the same condition as in the main thread.. sorry.
>>
>> Then how about using qemu_co_sleep_ns_wakeable() to replace
>> qio_channel_yield() either above, or in the caller?
> A better one is qemu_co_sleep_ns().  Off-topic: I'd even think we should
> have one qemu_co_sleep_realtime_ns() because currently all callers of
I am not aware of this :) , will check it.
> qemu_co_sleep_ns() is for the rt clock.


Yes that also works Peter. In that case, should i have a default time or take it from upper layers. And for live migration does something like of scale 1ms works?

Thanks

Manish Mishra

>


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22 16:42                   ` manish.mishra
@ 2022-11-22 17:16                     ` Peter Xu
  2022-11-22 17:31                       ` Daniel P. Berrangé
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Xu @ 2022-11-22 17:16 UTC (permalink / raw)
  To: manish.mishra
  Cc: Daniel P. Berrangé,
	qemu-devel, prerna.saxena, quintela, dgilbert, lsoaresp

On Tue, Nov 22, 2022 at 10:12:25PM +0530, manish.mishra wrote:
> 
> On 22/11/22 10:03 pm, Peter Xu wrote:
> > On Tue, Nov 22, 2022 at 11:29:05AM -0500, Peter Xu wrote:
> > > On Tue, Nov 22, 2022 at 11:10:18AM -0500, Peter Xu wrote:
> > > > On Tue, Nov 22, 2022 at 09:01:59PM +0530, manish.mishra wrote:
> > > > > On 22/11/22 8:19 pm, Daniel P. Berrangé wrote:
> > > > > > On Tue, Nov 22, 2022 at 09:41:01AM -0500, Peter Xu wrote:
> > > > > > > On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
> > > > > > > > On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> > > > > > > > > On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> > > > > > > > > > MSG_PEEK reads from the peek of channel, The data is treated as
> > > > > > > > > > unread and the next read shall still return this data. This
> > > > > > > > > > support is currently added only for socket class. Extra parameter
> > > > > > > > > > 'flags' is added to io_readv calls to pass extra read flags like
> > > > > > > > > > MSG_PEEK.
> > > > > > > > > > 
> > > > > > > > > > Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> > > > > > > > > > Signed-off-by: manish.mishra<manish.mishra@nutanix.com>
> > > > > > > > > > ---
> > > > > > > > > >     chardev/char-socket.c               |  4 +-
> > > > > > > > > >     include/io/channel.h                | 83 +++++++++++++++++++++++++++++
> > > > > > > > > >     io/channel-buffer.c                 |  1 +
> > > > > > > > > >     io/channel-command.c                |  1 +
> > > > > > > > > >     io/channel-file.c                   |  1 +
> > > > > > > > > >     io/channel-null.c                   |  1 +
> > > > > > > > > >     io/channel-socket.c                 | 16 +++++-
> > > > > > > > > >     io/channel-tls.c                    |  1 +
> > > > > > > > > >     io/channel-websock.c                |  1 +
> > > > > > > > > >     io/channel.c                        | 73 +++++++++++++++++++++++--
> > > > > > > > > >     migration/channel-block.c           |  1 +
> > > > > > > > > >     scsi/qemu-pr-helper.c               |  2 +-
> > > > > > > > > >     tests/qtest/tpm-emu.c               |  2 +-
> > > > > > > > > >     tests/unit/test-io-channel-socket.c |  1 +
> > > > > > > > > >     util/vhost-user-server.c            |  2 +-
> > > > > > > > > >     15 files changed, 179 insertions(+), 11 deletions(-)
> > > > > > > > > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > > > > > > > > index b76dca9cc1..a06b24766d 100644
> > > > > > > > > > --- a/io/channel-socket.c
> > > > > > > > > > +++ b/io/channel-socket.c
> > > > > > > > > > @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> > > > > > > > > >         }
> > > > > > > > > >     #endif /* WIN32 */
> > > > > > > > > > +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> > > > > > > > > > +
> > > > > > > > > This covers the incoming server side socket.
> > > > > > > > > 
> > > > > > > > > This also needs to be set in outgoing client side socket in
> > > > > > > > > qio_channel_socket_connect_async
> > > > > > > > Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
> > > > > > > > 
> > > > > > > > > > @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > > > > > > > > >     }
> > > > > > > > > >     #endif /* WIN32 */
> > > > > > > > > > -
> > > > > > > > > >     #ifdef QEMU_MSG_ZEROCOPY
> > > > > > > > > >     static int qio_channel_socket_flush(QIOChannel *ioc,
> > > > > > > > > >                                         Error **errp)
> > > > > > > > > Please remove this unrelated whitespace change.
> > > > > > > > > 
> > > > > > > > > 
> > > > > > > > > > @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
> > > > > > > > > >         return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
> > > > > > > > > >     }
> > > > > > > > > > +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> > > > > > > > > > +                                   const struct iovec *iov,
> > > > > > > > > > +                                   size_t niov,
> > > > > > > > > > +                                   Error **errp)
> > > > > > > > > > +{
> > > > > > > > > > +   ssize_t len = 0;
> > > > > > > > > > +   ssize_t total = iov_size(iov, niov);
> > > > > > > > > > +
> > > > > > > > > > +   while (len < total) {
> > > > > > > > > > +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> > > > > > > > > > +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> > > > > > > > > > +
> > > > > > > > > > +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > > > > > > > > +            if (qemu_in_coroutine()) {
> > > > > > > > > > +                qio_channel_yield(ioc, G_IO_IN);
> > > > > > > > > > +            } else {
> > > > > > > > > > +                qio_channel_wait(ioc, G_IO_IN);
> > > > > > > > > > +            }
> > > > > > > > > > +            continue;
> > > > > > > > > > +       }
> > > > > > > > > > +       if (len == 0) {
> > > > > > > > > > +           return 0;
> > > > > > > > > > +       }
> > > > > > > > > > +       if (len < 0) {
> > > > > > > > > > +           return -1;
> > > > > > > > > > +       }
> > > > > > > > > > +   }
> > > > > > > > > This will busy wait burning CPU where there is a read > 0 and < total.
> > > > > > > > > 
> > > > > > > > Daniel, i could use MSG_WAITALL too if that works but then we will lose opportunity to yield. Or if you have some other idea.
> > > > > > > How easy would this happen?
> > > > > > > 
> > > > > > > Another alternative is we could just return the partial len to caller then
> > > > > > > we fallback to the original channel orders if it happens.  And then if it
> > > > > > > mostly will never happen it'll behave merely the same as what we want.
> > > > > > Well we're trying to deal with a bug where the slow and/or unreliable
> > > > > > network causes channels to arrive in unexpected order. Given we know
> > > > > > we're having network trouble, I wouldn't want to make more assumptions
> > > > > > about things happening correctly.
> > > > > > 
> > > > > > 
> > > > > > With regards,
> > > > > > Daniel
> > > > > 
> > > > > Peter, I have seen MSG_PEEK used in combination with MSG_WAITALL, but looks like even though chances are less it can still return partial data even with multiple retries for signal case, so is not full proof.
> > > > > 
> > > > > *MSG_WAITALL *(since Linux 2.2)
> > > > >                This flag requests that the operation block until the full
> > > > >                request is satisfied.  However, the call may still return
> > > > >                less data than requested if a signal is caught, an error
> > > > >                or disconnect occurs, or the next data to be received is
> > > > >                of a different type than that returned.  This flag has no
> > > > >                effect for datagram sockets.
> > > > > 
> > > > > Actual read ahead will be little hackish, so just confirming we all are in agreement to do actual read ahead and i can send patch? :)
> > > > Yet another option is the caller handles partial PEEK and then we can sleep
> > > > in the migration code before another PEEK attempt until it reaches the full
> > > > length.
> > > > 
> > > > Even with that explicit sleep code IMHO it is cleaner than the read-header
> > > > flag plus things like !tls check just to avoid the handshake dead lock
> > > > itself (and if to go with this route we'd better also have a full document
> > > > on why !tls, aka, how the dead lock can happen).
> > > Nah, I forgot we're in the same condition as in the main thread.. sorry.
> > > 
> > > Then how about using qemu_co_sleep_ns_wakeable() to replace
> > > qio_channel_yield() either above, or in the caller?
> > A better one is qemu_co_sleep_ns().  Off-topic: I'd even think we should
> > have one qemu_co_sleep_realtime_ns() because currently all callers of
> I am not aware of this :) , will check it.
> > qemu_co_sleep_ns() is for the rt clock.
> 
> 
> Yes that also works Peter. In that case, should i have a default time or take it from upper layers. And for live migration does something like of scale 1ms works?

Sounds good to me on migration side.  When making it formal we'd also want
to know how Juan/Dave think.

But let's also wait for Dan's input about this before going forward.  If
the io code wants an _eof() version of PEEK then maybe we'd better do the
timeout-yield there even if not as elegant as G_IO_IN.  IIUC it's a matter
of whether we want to allow the PEEK interface return partial len.

Thanks,

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel
  2022-11-22 17:16                     ` Peter Xu
@ 2022-11-22 17:31                       ` Daniel P. Berrangé
  0 siblings, 0 replies; 24+ messages in thread
From: Daniel P. Berrangé @ 2022-11-22 17:31 UTC (permalink / raw)
  To: Peter Xu
  Cc: manish.mishra, qemu-devel, prerna.saxena, quintela, dgilbert, lsoaresp

On Tue, Nov 22, 2022 at 12:16:08PM -0500, Peter Xu wrote:
> On Tue, Nov 22, 2022 at 10:12:25PM +0530, manish.mishra wrote:
> > 
> > On 22/11/22 10:03 pm, Peter Xu wrote:
> > > On Tue, Nov 22, 2022 at 11:29:05AM -0500, Peter Xu wrote:
> > > > On Tue, Nov 22, 2022 at 11:10:18AM -0500, Peter Xu wrote:
> > > > > On Tue, Nov 22, 2022 at 09:01:59PM +0530, manish.mishra wrote:
> > > > > > On 22/11/22 8:19 pm, Daniel P. Berrangé wrote:
> > > > > > > On Tue, Nov 22, 2022 at 09:41:01AM -0500, Peter Xu wrote:
> > > > > > > > On Tue, Nov 22, 2022 at 02:38:53PM +0530, manish.mishra wrote:
> > > > > > > > > On 22/11/22 2:30 pm, Daniel P. Berrangé wrote:
> > > > > > > > > > On Sat, Nov 19, 2022 at 09:36:14AM +0000, manish.mishra wrote:
> > > > > > > > > > > MSG_PEEK reads from the peek of channel, The data is treated as
> > > > > > > > > > > unread and the next read shall still return this data. This
> > > > > > > > > > > support is currently added only for socket class. Extra parameter
> > > > > > > > > > > 'flags' is added to io_readv calls to pass extra read flags like
> > > > > > > > > > > MSG_PEEK.
> > > > > > > > > > > 
> > > > > > > > > > > Suggested-by: Daniel P. Berrangé <berrange@redhat.com
> > > > > > > > > > > Signed-off-by: manish.mishra<manish.mishra@nutanix.com>
> > > > > > > > > > > ---
> > > > > > > > > > >     chardev/char-socket.c               |  4 +-
> > > > > > > > > > >     include/io/channel.h                | 83 +++++++++++++++++++++++++++++
> > > > > > > > > > >     io/channel-buffer.c                 |  1 +
> > > > > > > > > > >     io/channel-command.c                |  1 +
> > > > > > > > > > >     io/channel-file.c                   |  1 +
> > > > > > > > > > >     io/channel-null.c                   |  1 +
> > > > > > > > > > >     io/channel-socket.c                 | 16 +++++-
> > > > > > > > > > >     io/channel-tls.c                    |  1 +
> > > > > > > > > > >     io/channel-websock.c                |  1 +
> > > > > > > > > > >     io/channel.c                        | 73 +++++++++++++++++++++++--
> > > > > > > > > > >     migration/channel-block.c           |  1 +
> > > > > > > > > > >     scsi/qemu-pr-helper.c               |  2 +-
> > > > > > > > > > >     tests/qtest/tpm-emu.c               |  2 +-
> > > > > > > > > > >     tests/unit/test-io-channel-socket.c |  1 +
> > > > > > > > > > >     util/vhost-user-server.c            |  2 +-
> > > > > > > > > > >     15 files changed, 179 insertions(+), 11 deletions(-)
> > > > > > > > > > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > > > > > > > > > index b76dca9cc1..a06b24766d 100644
> > > > > > > > > > > --- a/io/channel-socket.c
> > > > > > > > > > > +++ b/io/channel-socket.c
> > > > > > > > > > > @@ -406,6 +406,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
> > > > > > > > > > >         }
> > > > > > > > > > >     #endif /* WIN32 */
> > > > > > > > > > > +    qio_channel_set_feature(QIO_CHANNEL(cioc), QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
> > > > > > > > > > > +
> > > > > > > > > > This covers the incoming server side socket.
> > > > > > > > > > 
> > > > > > > > > > This also needs to be set in outgoing client side socket in
> > > > > > > > > > qio_channel_socket_connect_async
> > > > > > > > > Yes sorry, i considered only current use-case, but as it is generic one both should be there. Thanks will update it.
> > > > > > > > > 
> > > > > > > > > > > @@ -705,7 +718,6 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > > > > > > > > > >     }
> > > > > > > > > > >     #endif /* WIN32 */
> > > > > > > > > > > -
> > > > > > > > > > >     #ifdef QEMU_MSG_ZEROCOPY
> > > > > > > > > > >     static int qio_channel_socket_flush(QIOChannel *ioc,
> > > > > > > > > > >                                         Error **errp)
> > > > > > > > > > Please remove this unrelated whitespace change.
> > > > > > > > > > 
> > > > > > > > > > 
> > > > > > > > > > > @@ -109,6 +117,37 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
> > > > > > > > > > >         return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
> > > > > > > > > > >     }
> > > > > > > > > > > +int qio_channel_readv_peek_all_eof(QIOChannel *ioc,
> > > > > > > > > > > +                                   const struct iovec *iov,
> > > > > > > > > > > +                                   size_t niov,
> > > > > > > > > > > +                                   Error **errp)
> > > > > > > > > > > +{
> > > > > > > > > > > +   ssize_t len = 0;
> > > > > > > > > > > +   ssize_t total = iov_size(iov, niov);
> > > > > > > > > > > +
> > > > > > > > > > > +   while (len < total) {
> > > > > > > > > > > +       len = qio_channel_readv_full(ioc, iov, niov, NULL,
> > > > > > > > > > > +                                    NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
> > > > > > > > > > > +
> > > > > > > > > > > +       if (len == QIO_CHANNEL_ERR_BLOCK) {
> > > > > > > > > > > +            if (qemu_in_coroutine()) {
> > > > > > > > > > > +                qio_channel_yield(ioc, G_IO_IN);
> > > > > > > > > > > +            } else {
> > > > > > > > > > > +                qio_channel_wait(ioc, G_IO_IN);
> > > > > > > > > > > +            }
> > > > > > > > > > > +            continue;
> > > > > > > > > > > +       }
> > > > > > > > > > > +       if (len == 0) {
> > > > > > > > > > > +           return 0;
> > > > > > > > > > > +       }
> > > > > > > > > > > +       if (len < 0) {
> > > > > > > > > > > +           return -1;
> > > > > > > > > > > +       }
> > > > > > > > > > > +   }
> > > > > > > > > > This will busy wait burning CPU where there is a read > 0 and < total.
> > > > > > > > > > 
> > > > > > > > > Daniel, i could use MSG_WAITALL too if that works but then we will lose opportunity to yield. Or if you have some other idea.
> > > > > > > > How easy would this happen?
> > > > > > > > 
> > > > > > > > Another alternative is we could just return the partial len to caller then
> > > > > > > > we fallback to the original channel orders if it happens.  And then if it
> > > > > > > > mostly will never happen it'll behave merely the same as what we want.
> > > > > > > Well we're trying to deal with a bug where the slow and/or unreliable
> > > > > > > network causes channels to arrive in unexpected order. Given we know
> > > > > > > we're having network trouble, I wouldn't want to make more assumptions
> > > > > > > about things happening correctly.
> > > > > > > 
> > > > > > > 
> > > > > > > With regards,
> > > > > > > Daniel
> > > > > > 
> > > > > > Peter, I have seen MSG_PEEK used in combination with MSG_WAITALL, but looks like even though chances are less it can still return partial data even with multiple retries for signal case, so is not full proof.
> > > > > > 
> > > > > > *MSG_WAITALL *(since Linux 2.2)
> > > > > >                This flag requests that the operation block until the full
> > > > > >                request is satisfied.  However, the call may still return
> > > > > >                less data than requested if a signal is caught, an error
> > > > > >                or disconnect occurs, or the next data to be received is
> > > > > >                of a different type than that returned.  This flag has no
> > > > > >                effect for datagram sockets.
> > > > > > 
> > > > > > Actual read ahead will be little hackish, so just confirming we all are in agreement to do actual read ahead and i can send patch? :)
> > > > > Yet another option is the caller handles partial PEEK and then we can sleep
> > > > > in the migration code before another PEEK attempt until it reaches the full
> > > > > length.
> > > > > 
> > > > > Even with that explicit sleep code IMHO it is cleaner than the read-header
> > > > > flag plus things like !tls check just to avoid the handshake dead lock
> > > > > itself (and if to go with this route we'd better also have a full document
> > > > > on why !tls, aka, how the dead lock can happen).
> > > > Nah, I forgot we're in the same condition as in the main thread.. sorry.
> > > > 
> > > > Then how about using qemu_co_sleep_ns_wakeable() to replace
> > > > qio_channel_yield() either above, or in the caller?
> > > A better one is qemu_co_sleep_ns().  Off-topic: I'd even think we should
> > > have one qemu_co_sleep_realtime_ns() because currently all callers of
> > I am not aware of this :) , will check it.
> > > qemu_co_sleep_ns() is for the rt clock.
> > 
> > 
> > Yes that also works Peter. In that case, should i have a default time or take it from upper layers. And for live migration does something like of scale 1ms works?
> 
> Sounds good to me on migration side.  When making it formal we'd also want
> to know how Juan/Dave think.
> 
> But let's also wait for Dan's input about this before going forward.  If
> the io code wants an _eof() version of PEEK then maybe we'd better do the
> timeout-yield there even if not as elegant as G_IO_IN.  IIUC it's a matter
> of whether we want to allow the PEEK interface return partial len.

I don't think we should add an _eof() version with PEEK, because its
impossible to implement  sanely. If migration caller wants to busy
wait, or do a coroutine sleep it can do that.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2022-11-22 17:33 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-19  9:36 [PATCH 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
2022-11-19  9:36 ` [PATCH 2/2] migration: check magic value for deciding the mapping of channels manish.mishra
2022-11-19  9:36 ` manish.mishra
2022-11-19  9:36 ` [PATCH v3 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra
2022-11-22  9:00   ` Daniel P. Berrangé
2022-11-22  9:08     ` manish.mishra
2022-11-22  9:29       ` Daniel P. Berrangé
2022-11-22  9:40         ` manish.mishra
2022-11-22  9:53           ` Daniel P. Berrangé
2022-11-22 10:13             ` manish.mishra
2022-11-22 10:31               ` Daniel P. Berrangé
2022-11-22 14:41       ` Peter Xu
2022-11-22 14:49         ` Daniel P. Berrangé
2022-11-22 15:31           ` manish.mishra
2022-11-22 16:10             ` Peter Xu
2022-11-22 16:29               ` Peter Xu
2022-11-22 16:33                 ` Peter Xu
2022-11-22 16:42                   ` manish.mishra
2022-11-22 17:16                     ` Peter Xu
2022-11-22 17:31                       ` Daniel P. Berrangé
2022-11-19  9:36 ` [PATCH v3 2/2] migration: check magic value for deciding the mapping of channels manish.mishra
2022-11-21 21:59   ` Peter Xu
2022-11-22  9:01   ` Daniel P. Berrangé
2022-11-19  9:40 ` [PATCH 1/2] io: Add support for MSG_PEEK for socket channel manish.mishra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.