All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
@ 2018-11-13 12:28 Anthony PERARD
  2018-11-13 13:14 ` Ian Jackson
  0 siblings, 1 reply; 15+ messages in thread
From: Anthony PERARD @ 2018-11-13 12:28 UTC (permalink / raw)
  To: xen-devel; +Cc: Anthony PERARD, Wei Liu, Ian Jackson

Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
 tools/libxl/libxl_internal.h |  35 ++
 tools/libxl/libxl_qmp.c      | 683 +++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_types.idl  |   6 +
 3 files changed, 724 insertions(+)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 2b7e343775..fd84b60982 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -411,12 +411,47 @@ _hidden int libxl__ev_qmp_send(libxl__gc *gc, libxl__ev_qmp *ev,
                                const char *cmd, libxl__json_object *args);
 _hidden void libxl__ev_qmp_dispose(libxl__gc *gc, libxl__ev_qmp *ev);
 
+typedef enum {
+    /* initial state */
+    qmp_state_disconnected = 1,
+    /* connected to QMP socket, waiting for greeting message */
+    qmp_state_connecting,
+    /* qmp_capabilities command sent, waiting for reply */
+    qmp_state_capability_negotiation,
+    /* ready to send commands */
+    qmp_state_connected,
+    /* cmd sent, waiting for reply */
+    qmp_state_waiting_reply,
+} libxl__qmp_state;
+
 struct libxl__ev_qmp {
     /* caller should include this in their own struct */
     /* caller must fill these in, and they must all remain valid */
     libxl_domid domid;
     libxl__ev_qmp_callback *callback;
     int fd; /* set to send a fd with the command, -1 otherwise */
+
+    /*
+     * remaining fields are private to libxl_ev_qmp_*
+     */
+
+    int id;
+    libxl__carefd *qmp_cfd;
+    libxl__ev_fd qmp_efd;
+    libxl__qmp_state qmp_state;
+    /* receive buffer, with:
+     * rx_buf_size: current allocated size,
+     * rx_buf_used: actual data in the buffer */
+    char *rx_buf;
+    size_t rx_buf_size;
+    size_t rx_buf_used;
+    /* sending buffer */
+    char *tx_buf;
+    size_t tx_buf_len;
+    size_t tx_buf_off;
+    /* The message to send when ready */
+    char *msg;
+    size_t msg_len;
 };
 
 
diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c
index 218ee3ff09..07aa0dd89a 100644
--- a/tools/libxl/libxl_qmp.c
+++ b/tools/libxl/libxl_qmp.c
@@ -75,11 +75,18 @@
 #  define DEBUG_REPORT_RECEIVED(dom, buf, len) ((void)0)
 #endif
 
+#ifdef DEBUG_QMP_CLIENT
+#  define LOG_QMP(f, ...) LOGD(DEBUG, ev->domid, f, ##__VA_ARGS__)
+#else
+#  define LOG_QMP(f, ...)
+#endif
+
 /*
  * QMP types & constant
  */
 
 #define QMP_RECEIVE_BUFFER_SIZE 4096
+#define QMP_MAX_SIZE_RX_BUF MB(8)
 #define PCI_PT_QDEV_ID "pci-pt-%02x_%02x.%01x"
 
 /*
@@ -1312,6 +1319,682 @@ int libxl__qmp_initializations(libxl__gc *gc, uint32_t domid,
     return ret;
 }
 
+/* ------------ Implementation of libxl__ev_qmp ---------------- */
+
+/*
+ * Possible internal state compared to qmp_state:
+ *
+ * qmp_state  disconnected connecting capability   connected  waiting
+ *                                    _negotiation            _reply
+ * External   Idle         Active     Active       Connected  Active
+ * qmp_cfd    close        open       open         open       open
+ * qmp_efd    Idle         Active     Active       Active     Active
+ * id         reset        set        set          prev[1]    set
+ * rx_buf*    free         used       used         used       used
+ * tx_buf*    free         free       used/free    free       used/free
+ * msg*       free         set        set          free/set   free
+ *
+ * [1] id used on the previously sent command
+ *
+ * Possible buffers states:
+ * - receiving buffer:
+ *                    free   used
+ *   rx_buf           NULL   allocated
+ *   rx_buf_size      0      allocation size of `rx_buf`
+ *   rx_buf_off       0      <= rx_buf_size
+ * - transmitted buffer:
+ *                    free   used
+ *   tx_buf           NULL   contain data
+ *   tx_buf_len       0      size of data
+ *   tx_buf_off       0      <= tx_buf_len
+ * - queued user command:
+ *                    free  set
+ *   msg              NULL  contain data
+ *   msg_len          0     size of data
+ *
+ * - Allowed internal state transition:
+ * disconnected                         -> connecting
+ * connection                           -> capability_negotiation
+ * capability_negotiation/waiting_reply -> connected
+ * connected                            -> waiting_reply
+ * any                                  -> disconnected
+ */
+
+/* hard coded message ID used for capability negotiation ("xlq\0") */
+#define QMP_CAPABILITY_NEGOTIATION_MSGID 0x786c7100
+
+/* prototypes */
+
+static void qmp_ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev_fd,
+                               int fd, short events, short revents);
+static int qmp_ev_callback_writable(libxl__gc *gc,
+                                    libxl__ev_qmp *ev, int fd);
+static int qmp_ev_callback_readable(libxl__egc *egc,
+                                    libxl__ev_qmp *ev, int fd);
+static int qmp_ev_get_next_msg(libxl__egc *egc, libxl__ev_qmp *ev,
+                               libxl__json_object **o_r);
+static int qmp_ev_handle_message(libxl__egc *egc,
+                                 libxl__ev_qmp *ev,
+                                 const libxl__json_object *resp);
+
+/* helpers */
+
+static void qmp_ev_ensure_reading_writing(libxl__gc *gc, libxl__ev_qmp *ev)
+{
+    bool enable = false;
+    short events;
+
+    if (ev->tx_buf) {
+        enable = true;
+    } else {
+        enable = (ev->qmp_state == qmp_state_connected) && ev->msg;
+    }
+
+    if (enable)
+        events = ev->qmp_efd.events | POLLOUT;
+    else
+        events = ev->qmp_efd.events & ~POLLOUT;
+
+    libxl__ev_fd_modify(gc, &ev->qmp_efd, events);
+}
+
+static void qmp_ev_set_state(libxl__gc *gc, libxl__ev_qmp *ev,
+                             libxl__qmp_state new_state)
+{
+    libxl__qmp_state cur = ev->qmp_state;
+    switch (new_state) {
+    case qmp_state_disconnected:
+        break;
+    case qmp_state_connecting:
+        assert(cur == qmp_state_disconnected);
+        break;
+    case qmp_state_capability_negotiation:
+        assert(cur == qmp_state_connecting);
+        break;
+    case qmp_state_connected:
+        assert(cur == qmp_state_capability_negotiation ||
+               cur == qmp_state_waiting_reply);
+        break;
+    case qmp_state_waiting_reply:
+        assert(cur == qmp_state_connected);
+        break;
+    }
+
+    ev->qmp_state = new_state;
+}
+
+static int qmp_error_class_to_libxl_error_code(libxl__gc *gc,
+                                               const char *eclass)
+{
+    const libxl_enum_string_table *t = libxl_error_string_table;
+
+    /* compare "QMP_GENERIC_ERROR" from libxl_error to "GenericError"
+     * generated by the QMP server */
+
+    for ( ; t->s; t++) {
+            const char *s = eclass;
+            const char *se = t->s;
+        if (strncasecmp(t->s, "QMP_", 4))
+            continue;
+
+        /* skip "QMP_" */
+        se += 4;
+        while (*s && *se) {
+            /* skip underscores */
+            if (*se == '_') {
+                se++;
+                continue;
+            }
+            if (tolower(*s) != tolower(*se))
+                break;
+            s++, se++;
+        }
+        if (!*s && !*se)
+            return t->v;
+    }
+
+    return ERROR_UNKNOWN_QMP_ERROR;
+}
+
+static int qmp_ev_prepare_cmd(libxl__gc *gc,
+                              libxl__ev_qmp *ev,
+                              const char *cmd,
+                              const libxl__json_object *args)
+{
+    char *buf = NULL;
+    size_t len;
+
+    assert(!ev->tx_buf && !ev->tx_buf_len);
+    assert(!ev->msg && !ev->msg_len);
+
+    ev->id++;
+    buf = qmp_prepare_cmd(gc, cmd, args, ev->id, &len);
+    if (!buf) {
+        return ERROR_FAIL;
+    }
+
+    ev->msg = buf;
+    ev->msg_len = len;
+
+    return 0;
+}
+
+/* Setup connection */
+
+static int qmp_ev_connect(libxl__gc *gc, libxl__ev_qmp *ev)
+    /* disconnected -> connecting
+     * If the initial state isn't disconnected, then nothing is done */
+{
+    int fd;
+    int rc, r;
+    struct sockaddr_un un;
+    const char *qmp_socket_path;
+
+    if (ev->qmp_state != qmp_state_disconnected)
+        return 0;
+
+    qmp_socket_path = libxl__qemu_qmp_path(gc, ev->domid);
+
+    LOGD(DEBUG, ev->domid, "Connecting to %s", qmp_socket_path);
+
+    libxl__carefd_begin();
+    fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    ev->qmp_cfd = libxl__carefd_opened(CTX, fd);
+    if (!ev->qmp_cfd) {
+        LOGED(ERROR, ev->domid, "socket() failed");
+        rc = ERROR_FAIL;
+        goto out;
+    }
+    rc = libxl_fd_set_nonblock(CTX, libxl__carefd_fd(ev->qmp_cfd), 1);
+    if (rc)
+        goto out;
+
+    rc = libxl__prepare_sockaddr_un(gc, &un, qmp_socket_path,
+                                    "QMP socket");
+    if (rc)
+        goto out;
+
+    r = connect(libxl__carefd_fd(ev->qmp_cfd),
+                (struct sockaddr *) &un, sizeof(un));
+    if (r && errno != EINPROGRESS) {
+        LOGED(ERROR, ev->domid, "Failed to connect to QMP socket %s",
+              qmp_socket_path);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rc = libxl__ev_fd_register(gc, &ev->qmp_efd, qmp_ev_fd_callback,
+                               libxl__carefd_fd(ev->qmp_cfd), POLLIN);
+    if (rc)
+        goto out;
+
+    qmp_ev_set_state(gc, ev, qmp_state_connecting);
+
+    return 0;
+
+out:
+    libxl__carefd_close(ev->qmp_cfd);
+    ev->qmp_cfd = NULL;
+    return rc;
+}
+
+/* QMP FD callbacks */
+
+static void qmp_ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev_fd,
+                               int fd, short events, short revents)
+{
+    EGC_GC;
+    int rc;
+    libxl__json_object *o = NULL;
+    libxl__ev_qmp *ev = CONTAINER_OF(ev_fd, *ev, qmp_efd);
+
+    if (revents & (POLLHUP)) {
+        LOGD(ERROR, ev->domid, "received POLLHUP from QMP socket");
+        rc = ERROR_PROTOCOL_ERROR_QMP;
+        goto out;
+    }
+    if (revents & ~(POLLIN|POLLOUT)) {
+        LOGD(ERROR, ev->domid,
+             "unexpected poll event 0x%x on QMP socket (expected POLLIN "
+             "and/or POLLOUT)",
+            revents);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    if (revents & POLLOUT) {
+        rc = qmp_ev_callback_writable(gc, ev, fd);
+        if (rc)
+            goto out;
+    }
+
+    if (revents & POLLIN) {
+        rc = qmp_ev_callback_readable(egc, ev, fd);
+        if (rc)
+            goto out;
+
+        /* parse input */
+        while (1) {
+            /* parse rx buffer to find one json object */
+            rc = qmp_ev_get_next_msg(egc, ev, &o);
+            if (rc == ERROR_NOTFOUND) {
+                rc = 0;
+                break;
+            } else if (rc)
+                goto out;
+
+            /* Must be last and return when the user callback is called */
+            rc = qmp_ev_handle_message(egc, ev, o);
+            if (rc < 0)
+                goto out;
+            if (rc == 1) {
+                /* user callback has been called */
+                return;
+            }
+        }
+    }
+
+    qmp_ev_ensure_reading_writing(gc, ev);
+
+out:
+    if (rc) {
+        LOGD(ERROR, ev->domid,
+             "Error happend with the QMP connection to QEMU");
+
+        /* On error, deallocate all private ressources */
+        libxl__ev_qmp_dispose(gc, ev);
+
+        /* And tell libxl__ev_qmp user about the error */
+        ev->callback(egc, ev, NULL, rc); /* must be last */
+    }
+}
+
+static int qmp_ev_callback_writable(libxl__gc *gc,
+                                    libxl__ev_qmp *ev, int fd)
+    /* connected -> waiting_reply
+     * the state isn't change otherwise. */
+{
+    int rc;
+    ssize_t r;
+
+    if (ev->qmp_state == qmp_state_connected) {
+        assert(!ev->tx_buf);
+        if (ev->msg) {
+            ev->tx_buf = ev->msg;
+            ev->tx_buf_len = ev->msg_len;
+            ev->tx_buf_off = 0;
+            ev->msg = NULL;
+            ev->msg_len = 0;
+            qmp_ev_set_state(gc, ev, qmp_state_waiting_reply);
+        }
+    }
+
+    if (!ev->tx_buf)
+        return 0;
+
+    LOG_QMP("sending: '%.*s'", (int)ev->tx_buf_len, ev->tx_buf);
+
+    /*
+     * We will send a file descriptor associated with a command on the
+     * first byte of this command.
+     */
+    if (ev->qmp_state == qmp_state_waiting_reply &&
+        ev->fd >= 0 &&
+        ev->tx_buf_off == 0) {
+
+        rc = libxl__sendmsg_fds(gc, fd, ev->tx_buf, 1,
+                                1, &ev->fd, "QMP socket");
+        /* Check for EWOULDBLOCK, and return to try again later */
+        if (rc == ERROR_NOT_READY)
+            return 0;
+        if (rc)
+            return rc;
+        ev->tx_buf_off++;
+    }
+
+    while (ev->tx_buf_off < ev->tx_buf_len) {
+        r = write(fd, ev->tx_buf + ev->tx_buf_off,
+                  ev->tx_buf_len - ev->tx_buf_off);
+        if (r < 0) {
+            if (errno == EINTR)
+                continue;
+            if (errno == EWOULDBLOCK)
+                break;
+            LOGED(ERROR, ev->domid, "failed to write to QMP socket");
+            return ERROR_FAIL;
+        }
+        ev->tx_buf_off += r;
+    }
+
+    if (ev->tx_buf_off == ev->tx_buf_len) {
+        free(ev->tx_buf);
+        ev->tx_buf = NULL;
+        ev->tx_buf_len = ev->tx_buf_off = 0;
+    }
+
+    return 0;
+}
+
+static int qmp_ev_callback_readable(libxl__egc *egc,
+                                    libxl__ev_qmp *ev, int fd)
+    /* on error: * -> disconnected */
+{
+    EGC_GC;
+
+    while (1) {
+        ssize_t r;
+
+        /* Check if the buffer still have space, or increase size */
+        if (ev->rx_buf_size - ev->rx_buf_used < QMP_RECEIVE_BUFFER_SIZE) {
+            ev->rx_buf_size = max(ev->rx_buf_size * 2,
+                               (size_t)QMP_RECEIVE_BUFFER_SIZE * 2);
+            assert(ev->rx_buf_size <= QMP_MAX_SIZE_RX_BUF);
+            if (ev->rx_buf_size > QMP_MAX_SIZE_RX_BUF) {
+                LOGD(ERROR, ev->domid,
+                     "QMP receive buffer is too big (%ld > %lld)",
+                     ev->rx_buf_size, QMP_MAX_SIZE_RX_BUF);
+                return ERROR_BUFFERFULL;
+            }
+            ev->rx_buf = libxl__realloc(NOGC, ev->rx_buf, ev->rx_buf_size);
+        }
+
+        r = read(fd, ev->rx_buf + ev->rx_buf_used,
+                 ev->rx_buf_size - ev->rx_buf_used);
+        if (r < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+            if (errno == EWOULDBLOCK) {
+                break;
+            }
+            LOGED(ERROR, ev->domid, "error reading QMP socket");
+            return ERROR_FAIL;
+        }
+
+        if (r == 0) {
+            LOGD(ERROR, ev->domid, "Unexpected EOF on QMP socket");
+            return ERROR_PROTOCOL_ERROR_QMP;
+        }
+
+        LOG_QMP("received %ldB: '%.*s'", r,
+                (int)r, ev->rx_buf + ev->rx_buf_used);
+
+        ev->rx_buf_used += r;
+        assert(ev->rx_buf_used <= ev->rx_buf_size);
+    }
+
+    return 0;
+}
+
+/* Handle messages received from QMP server */
+
+static int qmp_ev_get_next_msg(libxl__egc *egc, libxl__ev_qmp *ev,
+                               libxl__json_object **o_r)
+    /* Find a JSON object and store it in o_r.
+     * return ERROR_NOTFOUND if no object is found.
+     * `o_r` is allocated within `egc`.
+     */
+{
+    EGC_GC;
+    size_t len;
+    char *end = NULL;
+    libxl__json_object *o = NULL;
+
+    if (!ev->rx_buf_used)
+        return ERROR_NOTFOUND;
+
+    /* Search for the end of a QMP message: "\r\n" */
+    end = memmem(ev->rx_buf, ev->rx_buf_used, "\r\n", 2);
+    if (!end)
+        return ERROR_NOTFOUND;
+    len = (end - ev->rx_buf) + 2;
+
+    LOG_QMP("parsing %luB: '%.*s'", len, (int)len, ev->rx_buf);
+
+    /* Replace \r by \0 so that libxl__json_parse can use strlen */
+    ev->rx_buf[len - 2] = '\0';
+    o = libxl__json_parse(gc, ev->rx_buf);
+
+    if (!o) {
+        LOGD(ERROR, ev->domid, "Parse error");
+        return ERROR_PROTOCOL_ERROR_QMP;
+    }
+
+    ev->rx_buf_used -= len;
+    memmove(ev->rx_buf, ev->rx_buf + len, ev->rx_buf_used);
+
+    LOG_QMP("JSON object received: %s",
+            libxl__json_object_to_json(gc, o));
+
+    *o_r = o;
+
+    return 0;
+}
+
+static int qmp_ev_parse_error_messages(libxl__egc *egc,
+                                       libxl__ev_qmp *ev,
+                                       const libxl__json_object *resp)
+{
+    EGC_GC;
+    int rc;
+    const char *s;
+    const libxl__json_object *o;
+    const libxl__json_object *err;
+
+    /*
+     * { "error": { "class": string, "desc": string } }
+     */
+
+    err = libxl__json_map_get("error", resp, JSON_MAP);
+
+    o = libxl__json_map_get("class", err, JSON_STRING);
+    if (!o) {
+        LOGD(ERROR, ev->domid,
+             "Protocol error: missing \"class\" member in error message");
+        return ERROR_PROTOCOL_ERROR_QMP;
+    }
+    s = libxl__json_object_get_string(o);
+    if (s)
+        rc = qmp_error_class_to_libxl_error_code(gc, s);
+    else
+        rc = ERROR_PROTOCOL_ERROR_QMP;
+
+    o = libxl__json_map_get("desc", err, JSON_STRING);
+    if (!o) {
+        LOGD(ERROR, ev->domid,
+             "Protocol error: missing \"desc\" member in error message");
+        return ERROR_PROTOCOL_ERROR_QMP;
+    }
+    s = libxl__json_object_get_string(o);
+    if (s)
+        LOGD(ERROR, ev->domid, "%s", s);
+    else
+        LOGD(ERROR, ev->domid, "Received unexpected error: %s",
+             libxl__json_object_to_json(gc, resp));
+    return rc;
+}
+
+static int qmp_ev_handle_message(libxl__egc *egc,
+                                 libxl__ev_qmp *ev,
+                                 const libxl__json_object *resp)
+    /*
+     * This function will handle every messages sent by the QMP server.
+     * Return values:
+     *   < 0    libxl error code
+     *   0      success
+     *   1      success, but a user callback has been called,
+     *          `ev` should not be used anymore.
+     *
+     * Possible state changes:
+     * connecting -> capability_negotiation
+     * capability_negotiation -> connected
+     * waiting_reply -> waiting_reply/connected
+     * on error: * -> disconnected
+     */
+{
+    EGC_GC;
+    int id;
+    int rc;
+    const libxl__json_object *o;
+    const libxl__json_object *response;
+    libxl__qmp_message_type type = qmp_response_type(resp);
+
+    switch (type) {
+    case LIBXL__QMP_MESSAGE_TYPE_QMP:
+        /* greeting message */
+
+        if (ev->qmp_state != qmp_state_connecting) {
+            LOGD(ERROR, ev->domid,
+                 "Unexpected greeting message received");
+            return ERROR_PROTOCOL_ERROR_QMP;
+        }
+
+        /* Prepare next message to send */
+        assert(!ev->tx_buf);
+        ev->tx_buf = qmp_prepare_cmd(gc, "qmp_capabilities", NULL,
+                              QMP_CAPABILITY_NEGOTIATION_MSGID,
+                              &ev->tx_buf_len);
+        ev->tx_buf_off = 0;
+        qmp_ev_set_state(gc, ev, qmp_state_capability_negotiation);
+
+        return 0;
+
+    case LIBXL__QMP_MESSAGE_TYPE_RETURN:
+    case LIBXL__QMP_MESSAGE_TYPE_ERROR:
+        /*
+         * Reply to a command (success/error) or server error
+         *
+         * In this cases, we are parsing two possibles responses:
+         * - success:
+         * { "return": json-value, "id": int }
+         * - error:
+         * { "error": { "class": string, "desc": string }, "id": int }
+         */
+
+        o = libxl__json_map_get("id", resp, JSON_INTEGER);
+        if (!o) {
+            /*
+             * If "id" isn't present, an error occur on the server before
+             * it has read the "id" provided by libxl.
+             */
+            qmp_ev_parse_error_messages(egc, ev, resp);
+            return ERROR_PROTOCOL_ERROR_QMP;
+        }
+
+        id = libxl__json_object_get_integer(o);
+
+        if (id == QMP_CAPABILITY_NEGOTIATION_MSGID) {
+            /* We have a response to our qmp_capabilities cmd */
+            if (ev->qmp_state != qmp_state_capability_negotiation ||
+                type != LIBXL__QMP_MESSAGE_TYPE_RETURN)
+                goto out_unknown_id;
+            qmp_ev_set_state(gc, ev, qmp_state_connected);
+            return 0;
+        }
+
+        if (ev->qmp_state == qmp_state_waiting_reply &&
+            id == ev->id) {
+            if (type == LIBXL__QMP_MESSAGE_TYPE_RETURN) {
+                response = libxl__json_map_get("return", resp, JSON_ANY);
+                rc = 0;
+            } else {
+                /* error message */
+                response = NULL;
+                rc = qmp_ev_parse_error_messages(egc, ev, resp);
+            }
+            qmp_ev_set_state(gc, ev, qmp_state_connected);
+            ev->callback(egc, ev, response, rc); /* must be last */
+            return 1;
+        }
+
+out_unknown_id:
+        LOGD(ERROR, ev->domid,
+             "Message from QEMU with unexpected id %d: %s",
+             id, libxl__json_object_to_json(gc, resp));
+        return ERROR_PROTOCOL_ERROR_QMP;
+
+    case LIBXL__QMP_MESSAGE_TYPE_EVENT:
+        /* Events are ignored */
+        return 0;
+
+    case LIBXL__QMP_MESSAGE_TYPE_INVALID:
+        LOGD(ERROR, ev->domid, "Unexpected message received: %s",
+             libxl__json_object_to_json(gc, resp));
+        return ERROR_PROTOCOL_ERROR_QMP;
+
+    default:
+        abort();
+    }
+
+    return 0;
+}
+
+/*
+ * libxl__ev_qmp_*
+ */
+
+void libxl__ev_qmp_init(libxl__ev_qmp *ev)
+    /* disconnected -> disconnected */
+{
+    ev->id = QMP_CAPABILITY_NEGOTIATION_MSGID + 1;
+
+    ev->qmp_cfd = NULL;
+    libxl__ev_fd_init(&ev->qmp_efd);
+    ev->qmp_state = qmp_state_disconnected;
+
+    ev->rx_buf = NULL;
+    ev->rx_buf_size = ev->rx_buf_used = 0;
+    ev->tx_buf = NULL;
+    ev->tx_buf_len = ev->tx_buf_off = 0;
+
+    ev->msg = NULL;
+    ev->msg_len = 0;
+}
+
+int libxl__ev_qmp_send(libxl__gc *gc, libxl__ev_qmp *ev,
+                       const char *cmd, libxl__json_object *args)
+    /* disconnected -> connecting
+     * connected -> waiting_reply */
+{
+    int rc;
+
+    LOGD(DEBUG, ev->domid, " ev %p, cmd '%s'", ev, cmd);
+
+    assert(ev->qmp_state == qmp_state_disconnected ||
+           ev->qmp_state == qmp_state_connected);
+
+    /* Connect to QEMU if not already connected */
+    rc = qmp_ev_connect(gc, ev);
+    if (rc)
+        goto out;
+
+    rc = qmp_ev_prepare_cmd(gc, ev, cmd, args);
+    if (rc)
+        goto out;
+
+    qmp_ev_ensure_reading_writing(gc, ev);
+
+out:
+    if (rc)
+        libxl__ev_qmp_dispose(gc, ev);
+    return rc;
+}
+
+void libxl__ev_qmp_dispose(libxl__gc *gc, libxl__ev_qmp *ev)
+    /* * -> disconnected */
+{
+    LOGD(DEBUG, ev->domid, " ev %p", ev);
+
+    free(ev->rx_buf);
+    free(ev->tx_buf);
+    free(ev->msg);
+
+    libxl__ev_fd_deregister(gc, &ev->qmp_efd);
+    libxl__carefd_close(ev->qmp_cfd);
+
+    libxl__ev_qmp_init(ev);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 3b8f967651..fec42b260c 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -69,6 +69,12 @@ libxl_error = Enumeration("error", [
     (-23, "NOTFOUND"),
     (-24, "DOMAIN_DESTROYED"), # Target domain ceased to exist during op
     (-25, "FEATURE_REMOVED"), # For functionality that has been removed
+    (-26, "PROTOCOL_ERROR_QMP"),
+    (-27, "UNKNOWN_QMP_ERROR"),
+    (-28, "QMP_GENERIC_ERROR"), # unspecified qmp error
+    (-29, "QMP_COMMAND_NOT_FOUND"), # the requested command has not been found
+    (-30, "QMP_DEVICE_NOT_ACTIVE"), # a device has failed to be become active
+    (-31, "QMP_DEVICE_NOT_FOUND"), # the requested device has not been found
     ], value_namespace = "")
 
 libxl_domain_type = Enumeration("domain_type", [
-- 
Anthony PERARD


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-13 12:28 [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_* Anthony PERARD
@ 2018-11-13 13:14 ` Ian Jackson
  2018-11-13 15:42   ` Anthony PERARD
  0 siblings, 1 reply; 15+ messages in thread
From: Ian Jackson @ 2018-11-13 13:14 UTC (permalink / raw)
  To: 23530.46744.511314.551654; +Cc: Anthony PERARD, xen-devel, Wei Liu

Anthony PERARD writes ("[PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
...

I think this was intended to satisfy my request for comments about
legal states:

> +/* helpers */
> +
> +static void qmp_ev_ensure_reading_writing(libxl__gc *gc, libxl__ev_qmp *ev)
> +{
> +    bool enable = false;

This one is probably an asisstant for transitioning between states so
the pre- and post-conditions may not be pure.  Whatever it is should
be documented...

> +static void qmp_ev_set_state(libxl__gc *gc, libxl__ev_qmp *ev,
> +                             libxl__qmp_state new_state)
> +{

This one at least does not need a comment :-).

> +static int qmp_ev_prepare_cmd(libxl__gc *gc,
> +                              libxl__ev_qmp *ev,
> +                              const char *cmd,
> +                              const libxl__json_object *args)
> +{
> +    char *buf = NULL;

Missing state comment.

> +static void qmp_ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev_fd,
> +                               int fd, short events, short revents)
> +{

Missing state comment, although I think the precondition can be easily
inferred from the state of ev_fd and the postcondition varies, but
would still be nice to discuss it.

> +static int qmp_ev_callback_writable(libxl__gc *gc,
> +                                    libxl__ev_qmp *ev, int fd)
> +    /* connected -> waiting_reply
> +     * the state isn't change otherwise. */
> +{

I don't know what `otherwise' means.  Maybe you mean all other states
are legal and remain unchanged ?  But that does not seem to be
likely.  Presumably disconnected is ruled out, at least.

> +static int qmp_ev_callback_readable(libxl__egc *egc,
> +                                    libxl__ev_qmp *ev, int fd)
> +    /* on error: * -> disconnected */

Precondition ?  And on non-error ?

> +static int qmp_ev_get_next_msg(libxl__egc *egc, libxl__ev_qmp *ev,
> +                               libxl__json_object **o_r)
> +    /* Find a JSON object and store it in o_r.
> +     * return ERROR_NOTFOUND if no object is found.
> +     * `o_r` is allocated within `egc`.
> +     */
> +{

Missing state comment.

> +static int qmp_ev_parse_error_messages(libxl__egc *egc,
> +                                       libxl__ev_qmp *ev,
> +                                       const libxl__json_object *resp)
> +{

This doesn't touch the state I think.  Should perhaps be mentioned in
a comment.

Thanks,
Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-13 13:14 ` Ian Jackson
@ 2018-11-13 15:42   ` Anthony PERARD
  2018-11-13 16:10     ` Ian Jackson
  0 siblings, 1 reply; 15+ messages in thread
From: Anthony PERARD @ 2018-11-13 15:42 UTC (permalink / raw)
  To: Ian Jackson; +Cc: 23530.46744.511314.551654, xen-devel, Wei Liu

On Tue, Nov 13, 2018 at 01:14:30PM +0000, Ian Jackson wrote:
> Anthony PERARD writes ("[PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> ...
> 
> I think this was intended to satisfy my request for comments about
> legal states:
> 
> > +/* helpers */
> > +
> > +static void qmp_ev_ensure_reading_writing(libxl__gc *gc, libxl__ev_qmp *ev)
> > +{
> > +    bool enable = false;
> 
> This one is probably an asisstant for transitioning between states so
> the pre- and post-conditions may not be pure.  Whatever it is should
> be documented...

It's hard to document the state transition of a function that doesn't
care of the current state when the function is called, and will attempt
to figure out the current state to find out if a function `writable`
needs to be called later.

But now I realise that `disconnected` would be an illigal state.

What about:

  Precondition: !disconnected
  ensure that qmp_ev_callback_writable is been called when needed

> > +static void qmp_ev_set_state(libxl__gc *gc, libxl__ev_qmp *ev,
> > +                             libxl__qmp_state new_state)
> > +{
> 
> This one at least does not need a comment :-).
> 
> > +static int qmp_ev_prepare_cmd(libxl__gc *gc,
> > +                              libxl__ev_qmp *ev,
> > +                              const char *cmd,
> > +                              const libxl__json_object *args)
> > +{
> > +    char *buf = NULL;
> 
> Missing state comment.

Maybe:

  Precondition: connecting/connected

> > +static void qmp_ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev_fd,
> > +                               int fd, short events, short revents)
> > +{
> 
> Missing state comment, although I think the precondition can be easily
> inferred from the state of ev_fd and the postcondition varies, but
> would still be nice to discuss it.

This function is the main loop function, so almost everything happen in
this function. It should not be called directly. So I'm not sure what
kind of comment would be usefull here.

Maybe:
  Preconditions:
    `ev_fd` is Active
    this means that `ev` isn't disconnected
  Any allowed internal state transition can happen.
  A user callback may be called, when that happen, the function should
  return.

> > +static int qmp_ev_callback_writable(libxl__gc *gc,
> > +                                    libxl__ev_qmp *ev, int fd)
> > +    /* connected -> waiting_reply
> > +     * the state isn't change otherwise. */
> > +{
> 
> I don't know what `otherwise' means.  Maybe you mean all other states
> are legal and remain unchanged ?  But that does not seem to be
> likely.  Presumably disconnected is ruled out, at least.

If for some random reason this function is called with the state
disconnected, it would just return. Unless the state is disconnecting
and tx_buf haven't been cleared yet.

`Otherwise` would be the `otherwise` of haskell, or the `default` of a
switch case in C.

So a different comment could be:
  Precondition:
    !disconnected
  State transition
    connected -> waiting_reply
    * -> state unchange
    on error: disconnected
  The state of the transmiting buffer will be changed.



> > +static int qmp_ev_callback_readable(libxl__egc *egc,
> > +                                    libxl__ev_qmp *ev, int fd)
> > +    /* on error: * -> disconnected */
> 
> Precondition ?  And on non-error ?

Here is a more complete comment:
  Precondition:
    !disconnected
  State transition:
    Only the state of the receiving buffer is change on success
    on error: disconnected

> > +static int qmp_ev_get_next_msg(libxl__egc *egc, libxl__ev_qmp *ev,
> > +                               libxl__json_object **o_r)
> > +    /* Find a JSON object and store it in o_r.
> > +     * return ERROR_NOTFOUND if no object is found.
> > +     * `o_r` is allocated within `egc`.
> > +     */
> > +{
> 
> Missing state comment.

  precondition: !disconnected
  state of the receiving buffer can be changed.

> > +static int qmp_ev_parse_error_messages(libxl__egc *egc,
> > +                                       libxl__ev_qmp *ev,
> > +                                       const libxl__json_object *resp)
> > +{
> 
> This doesn't touch the state I think.  Should perhaps be mentioned in
> a comment.

The only thing that this function use is set by a user (of
libxl__ev_qmp): ev->domid. But I guess that comment would do:

  no state change


Are all those comments good enough? Also sometime the internal state
isn't fully changed in one go, and the transition could happen in
several functions (I think). Do we needs states like disconnecting,
connectinging, ... ? with a comment that say that the value of the
internal variables can be one of before or after the state transition.

Next time I'll write one BIG function, and there will be less of those
comments to write :).

Thanks,

-- 
Anthony PERARD

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-13 15:42   ` Anthony PERARD
@ 2018-11-13 16:10     ` Ian Jackson
  2018-11-14 11:40       ` Anthony PERARD
  0 siblings, 1 reply; 15+ messages in thread
From: Ian Jackson @ 2018-11-13 16:10 UTC (permalink / raw)
  To: Anthony PERARD; +Cc: 23530.46744.511314.551654, Ian Jackson, Wei Liu, xen-devel

Anthony PERARD writes ("Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> On Tue, Nov 13, 2018 at 01:14:30PM +0000, Ian Jackson wrote:
> > This one is probably an asisstant for transitioning between states so
> > the pre- and post-conditions may not be pure.  Whatever it is should
> > be documented...
> 
> It's hard to document the state transition of a function that doesn't
> care of the current state when the function is called, and will attempt
> to figure out the current state to find out if a function `writable`
> needs to be called later.

Then write that down rather than just saying nothing!


I think it might be useful to paste in again a thing I wrote on the
15th of October:

  Again, I asked for more internal documentation about the legal states
  etc.  I will have to read it in detail again I'm afraid after that is
  done.

  The reason I ask for this is that this is a complicated and
  substantial pile of code.  It's not sensible for anyone to try to hold
  it in their head at once - we will make mistakes.  And it should be
  possbile to modify it without reading all of it.

  So, it should be possible for anyone to:

   * Look only at the summary internal architecture state machine
     comments and so on, and confirm to themselves that this is a
     sensible design and that all the possible states are represented
     and that the interlocking states of the detailed variables are
     both sufficient, and of manageable complexity.

   * Read any small fragment of code and see that transforms legal
     states into other legal states, by reference to the above.

  Right now this is not really possible.  I look at things like this:

I want someone who reads one of these functions to be able to see that
it is correct, without having to go and look at its call sites to
understand what it needs to promise.  And of course from my own point
of view, I won't want to reverse engineer the intention from the code.
It's OK if it is obvious but in this case I think it is usually
un-obvious.

These kind of explicitly coded state machines are indeed tiresome,
compared to synchronous code where the state is encoded in the program
counter (ie, the progress through the single function).  It more easy
to make mistakes where a variable is not set or has the wrong value,
because (compared to a synchronous function) the sequencing is
obscured.

Unfortunately we do not have a workable coroutines system, and
multithreading is even worse for writing bugs.  (Nontrivial
multithreaded programs are almost always full of race bugs.)  So we
are left with doing it this way, and we have to mitigate the risk of
bugs a different way: in this case, by writing down more explicitly in
comments things that could be easily inferred from reading the code if
the code were a single function.


One way to test to see if you have succeeded is as follows: can you
prove that each function is correct, in isolation, without having to
read the body of any other function ?  You are only allowed to rely on
the struct definition, and the formal state definitions, and the doc
comments for your function and everything you call.

That is obviously not possible if there is nothing saying what state
the function is supposed to leave things in.  Nor is it possible to
show that the function is correct unless either it tolerates any (even
impure) input state, or there is a comment saying what input states
it is supposed to deal with.


> Are all those comments good enough? Also sometime the internal state
> isn't fully changed in one go, and the transition could happen in
> several functions (I think). Do we needs states like disconnecting,
> connectinging, ... ? with a comment that say that the value of the
> internal variables can be one of before or after the state transition.

This is what I meant by `impure' states below.  I don't think you want
to write them in your state table, but you can describe them in
comments as `connected, but with rx_fd not Active' or some such.
`But' and `but maybe' are good phrasings for this.


> But now I realise that `disconnected` would be an illigal state.
> 
> What about:
> 
>   Precondition: !disconnected
>   ensure that qmp_ev_callback_writable is been called when needed

That sounds good, but I think you probably want something more like:

   On entry: connected/..., but with [the ev_fd] maybe Idle
   On return: the same state, but with [the ev_fd] Active iff needed

?  Or whatever else is true.

> > > +static int qmp_ev_prepare_cmd(libxl__gc *gc,
> > > +                              libxl__ev_qmp *ev,
> > > +                              const char *cmd,
> > > +                              const libxl__json_object *args)
> > > +{
> > > +    char *buf = NULL;
> > 
> > Missing state comment.
> 
> Maybe:
> 
>   Precondition: connecting/connected

Does it change the state to a new one ?  Are the old and new states
pure states as described in the state table, or are they
half-transitioned ?  (More on this below.)

> > > +static void qmp_ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev_fd,
> > > +                               int fd, short events, short revents)
> > > +{
> > 
> > Missing state comment, although I think the precondition can be easily
> > inferred from the state of ev_fd and the postcondition varies, but
> > would still be nice to discuss it.
> 
> This function is the main loop function, so almost everything happen in
> this function. It should not be called directly. So I'm not sure what
> kind of comment would be usefull here.

The purpose of the state comments is not just to allow verification
that call sites are legal.  It is also to allow verification that the
contents of the function is appropriate.

> Maybe:
>   Preconditions:
>     `ev_fd` is Active
>     this means that `ev` isn't disconnected
>   Any allowed internal state transition can happen.
>   A user callback may be called, when that happen, the function should
>   return.

You could also write something more discursive.  Maybe

    On entry, ev_fd is (of course) Active.  The ev_qmp may be in any
    state where this is permitted.  qmp_ev_fd_callback will do the
    work necessary to make progress, depending on the current state,
    and make the appropriate state transitions and callbacks.

?

> > > +static int qmp_ev_callback_writable(libxl__gc *gc,
> > > +                                    libxl__ev_qmp *ev, int fd)
> > > +    /* connected -> waiting_reply
> > > +     * the state isn't change otherwise. */
> > > +{
> > 
> > I don't know what `otherwise' means.  Maybe you mean all other states
> > are legal and remain unchanged ?  But that does not seem to be
> > likely.  Presumably disconnected is ruled out, at least.
> 
> If for some random reason this function is called with the state
> disconnected, it would just return. Unless the state is disconnecting
> and tx_buf haven't been cleared yet.
> 
> `Otherwise` would be the `otherwise` of haskell, or the `default` of a
> switch case in C.
> 
> So a different comment could be:
>   Precondition:
>     !disconnected

You are contradicting yourself.  You are both stating that this
function may be called in state disconnected, and that it may not.

But assuming that what you write here in your proposed comment is
true, ...

>   State transition
>     connected -> waiting_reply
>     * -> state unchange
>     on error: disconnected
>   The state of the transmiting buffer will be changed.

... this looks good.

> > > +static int qmp_ev_callback_readable(libxl__egc *egc,
> > > +                                    libxl__ev_qmp *ev, int fd)
> > > +    /* on error: * -> disconnected */
> > 
> > Precondition ?  And on non-error ?
> 
> Here is a more complete comment:
>   Precondition:
>     !disconnected
>   State transition:
>     Only the state of the receiving buffer is change on success
>     on error: disconnected

That's good.  I would have been happy with:

     !disconnected -> same state (with rx buffer updated)
     on error -> disconnected


> > > +static int qmp_ev_parse_error_messages(libxl__egc *egc,
> > > +                                       libxl__ev_qmp *ev,
> > > +                                       const libxl__json_object *resp)
> > > +{
> > 
> > This doesn't touch the state I think.  Should perhaps be mentioned in
> > a comment.
> 
> The only thing that this function use is set by a user (of
> libxl__ev_qmp): ev->domid. But I guess that comment would do:
> 
>   no state change

Yes.


> Next time I'll write one BIG function, and there will be less of those
> comments to write :).

Hah.


Thanks,
Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-13 16:10     ` Ian Jackson
@ 2018-11-14 11:40       ` Anthony PERARD
  2018-11-14 15:43         ` Ian Jackson
  0 siblings, 1 reply; 15+ messages in thread
From: Anthony PERARD @ 2018-11-14 11:40 UTC (permalink / raw)
  To: Ian Jackson; +Cc: 23530.46744.511314.551654, xen-devel, Wei Liu

On Tue, Nov 13, 2018 at 04:10:21PM +0000, Ian Jackson wrote:
> Anthony PERARD writes ("Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> > On Tue, Nov 13, 2018 at 01:14:30PM +0000, Ian Jackson wrote:
> > But now I realise that `disconnected` would be an illigal state.
> > 
> > What about:
> > 
> >   Precondition: !disconnected
> >   ensure that qmp_ev_callback_writable is been called when needed
> 
> That sounds good, but I think you probably want something more like:
> 
>    On entry: connected/..., but with [the ev_fd] maybe Idle
>    On return: the same state, but with [the ev_fd] Active iff needed
> 
> ?  Or whatever else is true.

Here is a simpler comment that is true:
  !disconnected -> same state

Even if in this function ev_fd is modified, it needs to be Active on
entry, and stay Active on return. But just saying that qmp_state needs
to be different than disconnected is enough.

> > > > +static int qmp_ev_prepare_cmd(libxl__gc *gc,
> > > > +                              libxl__ev_qmp *ev,
> > > > +                              const char *cmd,
> > > > +                              const libxl__json_object *args)
> > > > +{
> > > > +    char *buf = NULL;
> > > 
> > > Missing state comment.
> > 
> > Maybe:
> > 
> >   Precondition: connecting/connected
> 
> Does it change the state to a new one ?  Are the old and new states
> pure states as described in the state table, or are they
> half-transitioned ?  (More on this below.)

They are half-transitioned, here is a new comments:

    on entry: connecting/connected but with `msg` free
    on return: same state but with `msg` set

> > > > +static void qmp_ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev_fd,
> > > > +                               int fd, short events, short revents)
> > > > +{
> > > 
> > > Missing state comment, although I think the precondition can be easily
> > > inferred from the state of ev_fd and the postcondition varies, but
> > > would still be nice to discuss it.
> > 
> > This function is the main loop function, so almost everything happen in
> > this function. It should not be called directly. So I'm not sure what
> > kind of comment would be usefull here.
> 
> The purpose of the state comments is not just to allow verification
> that call sites are legal.  It is also to allow verification that the
> contents of the function is appropriate.
>
> > Maybe:
> >   Preconditions:
> >     `ev_fd` is Active
> >     this means that `ev` isn't disconnected
> >   Any allowed internal state transition can happen.
> >   A user callback may be called, when that happen, the function should
> >   return.
> 
> You could also write something more discursive.  Maybe
> 
>     On entry, ev_fd is (of course) Active.  The ev_qmp may be in any
>     state where this is permitted.  qmp_ev_fd_callback will do the
>     work necessary to make progress, depending on the current state,
>     and make the appropriate state transitions and callbacks.
> 
> ?

That sounds fine. Thanks.

> > > > +static int qmp_ev_callback_writable(libxl__gc *gc,
> > > > +                                    libxl__ev_qmp *ev, int fd)
> > > > +    /* connected -> waiting_reply
> > > > +     * the state isn't change otherwise. */
> > > > +{
> > > 
> > > I don't know what `otherwise' means.  Maybe you mean all other states
> > > are legal and remain unchanged ?  But that does not seem to be
> > > likely.  Presumably disconnected is ruled out, at least.
> > 
> > If for some random reason this function is called with the state
> > disconnected, it would just return. Unless the state is disconnecting
> > and tx_buf haven't been cleared yet.
> > 
> > `Otherwise` would be the `otherwise` of haskell, or the `default` of a
> > switch case in C.
> > 
> > So a different comment could be:
> >   Precondition:
> >     !disconnected
> 
> You are contradicting yourself.  You are both stating that this
> function may be called in state disconnected, and that it may not.
> 
> But assuming that what you write here in your proposed comment is
> true, ...
> 
> >   State transition
> >     connected -> waiting_reply
> >     * -> state unchange
> >     on error: disconnected
> >   The state of the transmiting buffer will be changed.
> 
> ... this looks good.

Do I need to say here and everywhere else that on error the new state
isn't really disconnected, and that the ev_qmp needs to be cleaned?
On one hand, saying that the new state is disconnected means that the
ev_qmp functions that only deal with !disconnected, but on the other,
the caller still need to call _dispose.

> > > > +static int qmp_ev_callback_readable(libxl__egc *egc,
> > > > +                                    libxl__ev_qmp *ev, int fd)
> > > > +    /* on error: * -> disconnected */
> > > 
> > > Precondition ?  And on non-error ?
> > 
> > Here is a more complete comment:
> >   Precondition:
> >     !disconnected
> >   State transition:
> >     Only the state of the receiving buffer is change on success
> >     on error: disconnected
> 
> That's good.  I would have been happy with:
> 
>      !disconnected -> same state (with rx buffer updated)
>      on error -> disconnected

I'll go with that, that looks better and easier to read.

Thanks,

-- 
Anthony PERARD

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-14 11:40       ` Anthony PERARD
@ 2018-11-14 15:43         ` Ian Jackson
  2018-11-14 15:49           ` Ian Jackson
  2018-11-14 16:33           ` Anthony PERARD
  0 siblings, 2 replies; 15+ messages in thread
From: Ian Jackson @ 2018-11-14 15:43 UTC (permalink / raw)
  To: Anthony PERARD; +Cc: xen-devel, Wei Liu

(I finally remembered to drop the message-id from the CC header...)

Anthony PERARD writes ("Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> Here is a simpler comment that is true:
>   !disconnected -> same state
> 
> Even if in this function ev_fd is modified, it needs to be Active on
> entry, and stay Active on return. But just saying that qmp_state needs
> to be different than disconnected is enough.

LGTM.

> > Does it change the state to a new one ?  Are the old and new states
> > pure states as described in the state table, or are they
> > half-transitioned ?  (More on this below.)
> 
> They are half-transitioned, here is a new comments:
> 
>     on entry: connecting/connected but with `msg` free
>     on return: same state but with `msg` set

LGTM.


> > But assuming that what you write here in your proposed comment is
> > true, ...
> > 
> > >   State transition
> > >     connected -> waiting_reply
> > >     * -> state unchange
> > >     on error: disconnected
> > >   The state of the transmiting buffer will be changed.
> > 
> > ... this looks good.
> 
> Do I need to say here and everywhere else that on error the new state
> isn't really disconnected, and that the ev_qmp needs to be cleaned?
> On one hand, saying that the new state is disconnected means that the
> ev_qmp functions that only deal with !disconnected, but on the other,
> the caller still need to call _dispose.

I think you are saying you can leave your ev_qmp in a state like
disconnected, but with things allocated.  Let us call that state
`broken' (in internal nomenclature).

But that state `broken' does not correspond to any of your public
states.  So your API document does not match the code.

You need to either change the API to document this state as a public
Broken state, or arrange for every ev_qmp-internal function to clean
it up appropriately before control passes outside the ev_qmp
implementation.

It is probably easier to introduce the new public Broken state.

As for the commentary: if you prefer not to document the `error ->
Broken' transition next to each function, you can write a general
statement in the internal design.  Eg, `for any internal function
which returns an error code, the state on error return will be Broken,
unless otherwise stated'.  Or something.

Thanks,
Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-14 15:43         ` Ian Jackson
@ 2018-11-14 15:49           ` Ian Jackson
  2018-11-14 16:33           ` Anthony PERARD
  1 sibling, 0 replies; 15+ messages in thread
From: Ian Jackson @ 2018-11-14 15:49 UTC (permalink / raw)
  To: Anthony PERARD, xen-devel, Wei Liu

Ian Jackson writes ("Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> It is probably easier to introduce the new public Broken state.

If you do this you should think about what should happen if your next
layer up attempts to reuse a Broken libxl__ev_qmp for an unrelated
command.  I think some of the teardown after failed domain creation
might end up doing that.

Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-14 15:43         ` Ian Jackson
  2018-11-14 15:49           ` Ian Jackson
@ 2018-11-14 16:33           ` Anthony PERARD
  2018-11-14 17:07             ` Ian Jackson
  1 sibling, 1 reply; 15+ messages in thread
From: Anthony PERARD @ 2018-11-14 16:33 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel, Wei Liu

On Wed, Nov 14, 2018 at 03:43:41PM +0000, Ian Jackson wrote:
> (I finally remembered to drop the message-id from the CC header...)

:), I kept forgeting as well.

> Anthony PERARD writes ("Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> > > But assuming that what you write here in your proposed comment is
> > > true, ...
> > > 
> > > >   State transition
> > > >     connected -> waiting_reply
> > > >     * -> state unchange
> > > >     on error: disconnected
> > > >   The state of the transmiting buffer will be changed.
> > > 
> > > ... this looks good.
> > 
> > Do I need to say here and everywhere else that on error the new state
> > isn't really disconnected, and that the ev_qmp needs to be cleaned?
> > On one hand, saying that the new state is disconnected means that the
> > ev_qmp functions that only deal with !disconnected, but on the other,
> > the caller still need to call _dispose.
> 
> I think you are saying you can leave your ev_qmp in a state like
> disconnected, but with things allocated.  Let us call that state
> `broken' (in internal nomenclature).
> 
> But that state `broken' does not correspond to any of your public
> states.  So your API document does not match the code.
> 
> You need to either change the API to document this state as a public
> Broken state, or arrange for every ev_qmp-internal function to clean
> it up appropriately before control passes outside the ev_qmp
> implementation.

The current implementation already cleanup the broken state before the
control is passed outside ev_qmp implementation. That result in the Idle
public state. Whenever an internal function throw an error, it lets the
main function qmp_ev_fd_callback taking care of cleanup the `broken` state
so that when the control passes outside ev_qmp implementation the state
is disconnected/Idle.

An internal `broken` state is just an half-transition toward the
disconnected/Idle state.

I guess I could add to the internal state documentation:

  When an internal function return an error, it can leave ev_qmp in a
  `broken` state but only if the caller is another internal function.
  That `broken` needs to be cleaned up, e.i. transitionned to the
  `disconnected` state, before the control of ev_qmp is released
  outsides of ev_qmp implementation.

Thanks,

-- 
Anthony PERARD

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-14 16:33           ` Anthony PERARD
@ 2018-11-14 17:07             ` Ian Jackson
  2018-11-15 11:15               ` [PATCH v6.2 " Anthony PERARD
  0 siblings, 1 reply; 15+ messages in thread
From: Ian Jackson @ 2018-11-14 17:07 UTC (permalink / raw)
  To: Anthony PERARD; +Cc: xen-devel, Wei Liu

Anthony PERARD writes ("Re: [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> The current implementation already cleanup the broken state before the
> control is passed outside ev_qmp implementation. That result in the Idle
> public state. Whenever an internal function throw an error, it lets the
> main function qmp_ev_fd_callback taking care of cleanup the `broken` state
> so that when the control passes outside ev_qmp implementation the state
> is disconnected/Idle.

Ah right.

> An internal `broken` state is just an half-transition toward the
> disconnected/Idle state.
> 
> I guess I could add to the internal state documentation:
> 
>   When an internal function return an error, it can leave ev_qmp in a
>   `broken` state but only if the caller is another internal function.
>   That `broken` needs to be cleaned up, e.i. transitionned to the
>   `disconnected` state, before the control of ev_qmp is released
>   outsides of ev_qmp implementation.

If you do this you need to describe what kinds of states are legal
`broken' states.  This is probably must easily done by making the
`broken' state a proper first-class internal state (giving it an entry
in the state table), but stating, as you do above, that it is not
visible externally because it's always tidied into `disconnected'
before control passes outside ev_qmp.

Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-14 17:07             ` Ian Jackson
@ 2018-11-15 11:15               ` Anthony PERARD
  2018-11-15 18:40                 ` Ian Jackson
  0 siblings, 1 reply; 15+ messages in thread
From: Anthony PERARD @ 2018-11-15 11:15 UTC (permalink / raw)
  To: xen-devel; +Cc: Anthony PERARD, Wei Liu, Ian Jackson

Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>

---
v6.2:
    Add definition of the internal broken state
    updated comments about states
---
 tools/libxl/libxl_internal.h |  35 ++
 tools/libxl/libxl_qmp.c      | 708 +++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_types.idl  |   6 +
 3 files changed, 749 insertions(+)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 2b7e343775..fd84b60982 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -411,12 +411,47 @@ _hidden int libxl__ev_qmp_send(libxl__gc *gc, libxl__ev_qmp *ev,
                                const char *cmd, libxl__json_object *args);
 _hidden void libxl__ev_qmp_dispose(libxl__gc *gc, libxl__ev_qmp *ev);
 
+typedef enum {
+    /* initial state */
+    qmp_state_disconnected = 1,
+    /* connected to QMP socket, waiting for greeting message */
+    qmp_state_connecting,
+    /* qmp_capabilities command sent, waiting for reply */
+    qmp_state_capability_negotiation,
+    /* ready to send commands */
+    qmp_state_connected,
+    /* cmd sent, waiting for reply */
+    qmp_state_waiting_reply,
+} libxl__qmp_state;
+
 struct libxl__ev_qmp {
     /* caller should include this in their own struct */
     /* caller must fill these in, and they must all remain valid */
     libxl_domid domid;
     libxl__ev_qmp_callback *callback;
     int fd; /* set to send a fd with the command, -1 otherwise */
+
+    /*
+     * remaining fields are private to libxl_ev_qmp_*
+     */
+
+    int id;
+    libxl__carefd *qmp_cfd;
+    libxl__ev_fd qmp_efd;
+    libxl__qmp_state qmp_state;
+    /* receive buffer, with:
+     * rx_buf_size: current allocated size,
+     * rx_buf_used: actual data in the buffer */
+    char *rx_buf;
+    size_t rx_buf_size;
+    size_t rx_buf_used;
+    /* sending buffer */
+    char *tx_buf;
+    size_t tx_buf_len;
+    size_t tx_buf_off;
+    /* The message to send when ready */
+    char *msg;
+    size_t msg_len;
 };
 
 
diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c
index 218ee3ff09..a7138d204e 100644
--- a/tools/libxl/libxl_qmp.c
+++ b/tools/libxl/libxl_qmp.c
@@ -75,11 +75,18 @@
 #  define DEBUG_REPORT_RECEIVED(dom, buf, len) ((void)0)
 #endif
 
+#ifdef DEBUG_QMP_CLIENT
+#  define LOG_QMP(f, ...) LOGD(DEBUG, ev->domid, f, ##__VA_ARGS__)
+#else
+#  define LOG_QMP(f, ...)
+#endif
+
 /*
  * QMP types & constant
  */
 
 #define QMP_RECEIVE_BUFFER_SIZE 4096
+#define QMP_MAX_SIZE_RX_BUF MB(8)
 #define PCI_PT_QDEV_ID "pci-pt-%02x_%02x.%01x"
 
 /*
@@ -1312,6 +1319,707 @@ int libxl__qmp_initializations(libxl__gc *gc, uint32_t domid,
     return ret;
 }
 
+/* ------------ Implementation of libxl__ev_qmp ---------------- */
+
+/*
+ * Possible internal state compared to qmp_state:
+ *
+ * qmp_state      External  qmp_cfd  qmp_efd  id      rx_buf* tx_buf* msg*
+ * disconnected   Idle      close    Idle     reset   free    free    free
+ * connecting     Active    open     Active   set     used    free    set
+ * capability_negotiation
+ *                Active    open     Active   set     used    any     set
+ * connected      Connected open     Active   prev[1] used    free    any
+ * waiting_reply  Active    open     Active   set     used    any     free
+ * broken[2]      none[3]   any      Active   any     any     any     any
+ *
+ * [1] id used on the previously sent command
+ *
+ * [2] When an internal function return an error, it can leave ev_qmp in a
+ * `broken` state but only if the caller is another internal function.
+ * That `broken` needs to be cleaned up, e.i. transitionned to the
+ * `disconnected` state, before the control of ev_qmp is released outsides
+ * of ev_qmp implementation.
+ *
+ * [3] This internal state should not be visible externally, see [2].
+ *
+ * Possible buffers states:
+ * - receiving buffer:
+ *                    free   used
+ *   rx_buf           NULL   allocated
+ *   rx_buf_size      0      allocation size of `rx_buf`
+ *   rx_buf_off       0      <= rx_buf_size
+ * - transmiting buffer:
+ *                    free   used
+ *   tx_buf           NULL   contain data
+ *   tx_buf_len       0      size of data
+ *   tx_buf_off       0      <= tx_buf_len
+ * - queued user command:
+ *                    free  set
+ *   msg              NULL  contain data
+ *   msg_len          0     size of data
+ *
+ * - Allowed internal state transition:
+ * disconnected                         -> connecting
+ * connection                           -> capability_negotiation
+ * capability_negotiation/waiting_reply -> connected
+ * connected                            -> waiting_reply
+ * any                                  -> disconnected
+ */
+
+/* hard coded message ID used for capability negotiation ("xlq\0") */
+#define QMP_CAPABILITY_NEGOTIATION_MSGID 0x786c7100
+
+/* prototypes */
+
+static void qmp_ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev_fd,
+                               int fd, short events, short revents);
+static int qmp_ev_callback_writable(libxl__gc *gc,
+                                    libxl__ev_qmp *ev, int fd);
+static int qmp_ev_callback_readable(libxl__egc *egc,
+                                    libxl__ev_qmp *ev, int fd);
+static int qmp_ev_get_next_msg(libxl__egc *egc, libxl__ev_qmp *ev,
+                               libxl__json_object **o_r);
+static int qmp_ev_handle_message(libxl__egc *egc,
+                                 libxl__ev_qmp *ev,
+                                 const libxl__json_object *resp);
+
+/* helpers */
+
+static void qmp_ev_ensure_reading_writing(libxl__gc *gc, libxl__ev_qmp *ev)
+    /* !disconnected -> same state */
+{
+    bool enable = false;
+    short events;
+
+    if (ev->tx_buf) {
+        enable = true;
+    } else {
+        enable = (ev->qmp_state == qmp_state_connected) && ev->msg;
+    }
+
+    if (enable)
+        events = ev->qmp_efd.events | POLLOUT;
+    else
+        events = ev->qmp_efd.events & ~POLLOUT;
+
+    libxl__ev_fd_modify(gc, &ev->qmp_efd, events);
+}
+
+static void qmp_ev_set_state(libxl__gc *gc, libxl__ev_qmp *ev,
+                             libxl__qmp_state new_state)
+{
+    libxl__qmp_state cur = ev->qmp_state;
+    switch (new_state) {
+    case qmp_state_disconnected:
+        break;
+    case qmp_state_connecting:
+        assert(cur == qmp_state_disconnected);
+        break;
+    case qmp_state_capability_negotiation:
+        assert(cur == qmp_state_connecting);
+        break;
+    case qmp_state_connected:
+        assert(cur == qmp_state_capability_negotiation ||
+               cur == qmp_state_waiting_reply);
+        break;
+    case qmp_state_waiting_reply:
+        assert(cur == qmp_state_connected);
+        break;
+    }
+
+    ev->qmp_state = new_state;
+}
+
+static int qmp_error_class_to_libxl_error_code(libxl__gc *gc,
+                                               const char *eclass)
+{
+    const libxl_enum_string_table *t = libxl_error_string_table;
+
+    /* compare "QMP_GENERIC_ERROR" from libxl_error to "GenericError"
+     * generated by the QMP server */
+
+    for ( ; t->s; t++) {
+            const char *s = eclass;
+            const char *se = t->s;
+        if (strncasecmp(t->s, "QMP_", 4))
+            continue;
+
+        /* skip "QMP_" */
+        se += 4;
+        while (*s && *se) {
+            /* skip underscores */
+            if (*se == '_') {
+                se++;
+                continue;
+            }
+            if (tolower(*s) != tolower(*se))
+                break;
+            s++, se++;
+        }
+        if (!*s && !*se)
+            return t->v;
+    }
+
+    return ERROR_UNKNOWN_QMP_ERROR;
+}
+
+static int qmp_ev_prepare_cmd(libxl__gc *gc,
+                              libxl__ev_qmp *ev,
+                              const char *cmd,
+                              const libxl__json_object *args)
+    /* on entry: connecting/connected but with `msg` free
+     * on return: same state but with `msg` set */
+{
+    char *buf = NULL;
+    size_t len;
+
+    assert(!ev->tx_buf && !ev->tx_buf_len);
+    assert(!ev->msg && !ev->msg_len);
+
+    ev->id++;
+    buf = qmp_prepare_cmd(gc, cmd, args, ev->id, &len);
+    if (!buf) {
+        return ERROR_FAIL;
+    }
+
+    ev->msg = buf;
+    ev->msg_len = len;
+
+    return 0;
+}
+
+/* Setup connection */
+
+static int qmp_ev_connect(libxl__gc *gc, libxl__ev_qmp *ev)
+    /* disconnected -> connecting but with `msg` free
+     * on error: disconnected
+     * If the initial state isn't disconnected, then nothing is done */
+{
+    int fd;
+    int rc, r;
+    struct sockaddr_un un;
+    const char *qmp_socket_path;
+
+    if (ev->qmp_state != qmp_state_disconnected)
+        return 0;
+
+    qmp_socket_path = libxl__qemu_qmp_path(gc, ev->domid);
+
+    LOGD(DEBUG, ev->domid, "Connecting to %s", qmp_socket_path);
+
+    libxl__carefd_begin();
+    fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    ev->qmp_cfd = libxl__carefd_opened(CTX, fd);
+    if (!ev->qmp_cfd) {
+        LOGED(ERROR, ev->domid, "socket() failed");
+        rc = ERROR_FAIL;
+        goto out;
+    }
+    rc = libxl_fd_set_nonblock(CTX, libxl__carefd_fd(ev->qmp_cfd), 1);
+    if (rc)
+        goto out;
+
+    rc = libxl__prepare_sockaddr_un(gc, &un, qmp_socket_path,
+                                    "QMP socket");
+    if (rc)
+        goto out;
+
+    r = connect(libxl__carefd_fd(ev->qmp_cfd),
+                (struct sockaddr *) &un, sizeof(un));
+    if (r && errno != EINPROGRESS) {
+        LOGED(ERROR, ev->domid, "Failed to connect to QMP socket %s",
+              qmp_socket_path);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rc = libxl__ev_fd_register(gc, &ev->qmp_efd, qmp_ev_fd_callback,
+                               libxl__carefd_fd(ev->qmp_cfd), POLLIN);
+    if (rc)
+        goto out;
+
+    qmp_ev_set_state(gc, ev, qmp_state_connecting);
+
+    return 0;
+
+out:
+    libxl__carefd_close(ev->qmp_cfd);
+    ev->qmp_cfd = NULL;
+    return rc;
+}
+
+/* QMP FD callbacks */
+
+static void qmp_ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev_fd,
+                               int fd, short events, short revents)
+    /* On entry, ev_fd is (of course) Active.  The ev_qmp may be in any
+     * state where this is permitted.  qmp_ev_fd_callback will do the work
+     * necessary to make progress, depending on the current state, and make
+     * the appropriate state transitions and callbacks.  */
+{
+    EGC_GC;
+    int rc;
+    libxl__json_object *o = NULL;
+    libxl__ev_qmp *ev = CONTAINER_OF(ev_fd, *ev, qmp_efd);
+
+    if (revents & (POLLHUP)) {
+        LOGD(ERROR, ev->domid, "received POLLHUP from QMP socket");
+        rc = ERROR_PROTOCOL_ERROR_QMP;
+        goto out;
+    }
+    if (revents & ~(POLLIN|POLLOUT)) {
+        LOGD(ERROR, ev->domid,
+             "unexpected poll event 0x%x on QMP socket (expected POLLIN "
+             "and/or POLLOUT)",
+            revents);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    if (revents & POLLOUT) {
+        rc = qmp_ev_callback_writable(gc, ev, fd);
+        if (rc)
+            goto out;
+    }
+
+    if (revents & POLLIN) {
+        rc = qmp_ev_callback_readable(egc, ev, fd);
+        if (rc)
+            goto out;
+
+        /* parse input */
+        while (1) {
+            /* parse rx buffer to find one json object */
+            rc = qmp_ev_get_next_msg(egc, ev, &o);
+            if (rc == ERROR_NOTFOUND) {
+                rc = 0;
+                break;
+            } else if (rc)
+                goto out;
+
+            /* Must be last and return when the user callback is called */
+            rc = qmp_ev_handle_message(egc, ev, o);
+            if (rc < 0)
+                goto out;
+            if (rc == 1) {
+                /* user callback has been called */
+                return;
+            }
+        }
+    }
+
+    qmp_ev_ensure_reading_writing(gc, ev);
+
+out:
+    if (rc) {
+        LOGD(ERROR, ev->domid,
+             "Error happend with the QMP connection to QEMU");
+
+        /* On error, deallocate all private ressources */
+        libxl__ev_qmp_dispose(gc, ev);
+
+        /* And tell libxl__ev_qmp user about the error */
+        ev->callback(egc, ev, NULL, rc); /* must be last */
+    }
+}
+
+static int qmp_ev_callback_writable(libxl__gc *gc,
+                                    libxl__ev_qmp *ev, int fd)
+    /* on entry: !disconnected
+     * on return, one of these state transition:
+     *   connected -> waiting_reply
+     *   * -> state unchange
+     *   but with the state of tx_buf changed
+     * on error: broken */
+{
+    int rc;
+    ssize_t r;
+
+    if (ev->qmp_state == qmp_state_connected) {
+        assert(!ev->tx_buf);
+        if (ev->msg) {
+            ev->tx_buf = ev->msg;
+            ev->tx_buf_len = ev->msg_len;
+            ev->tx_buf_off = 0;
+            ev->msg = NULL;
+            ev->msg_len = 0;
+            qmp_ev_set_state(gc, ev, qmp_state_waiting_reply);
+        }
+    }
+
+    if (!ev->tx_buf)
+        return 0;
+
+    LOG_QMP("sending: '%.*s'", (int)ev->tx_buf_len, ev->tx_buf);
+
+    /*
+     * We will send a file descriptor associated with a command on the
+     * first byte of this command.
+     */
+    if (ev->qmp_state == qmp_state_waiting_reply &&
+        ev->fd >= 0 &&
+        ev->tx_buf_off == 0) {
+
+        rc = libxl__sendmsg_fds(gc, fd, ev->tx_buf, 1,
+                                1, &ev->fd, "QMP socket");
+        /* Check for EWOULDBLOCK, and return to try again later */
+        if (rc == ERROR_NOT_READY)
+            return 0;
+        if (rc)
+            return rc;
+        ev->tx_buf_off++;
+    }
+
+    while (ev->tx_buf_off < ev->tx_buf_len) {
+        r = write(fd, ev->tx_buf + ev->tx_buf_off,
+                  ev->tx_buf_len - ev->tx_buf_off);
+        if (r < 0) {
+            if (errno == EINTR)
+                continue;
+            if (errno == EWOULDBLOCK)
+                break;
+            LOGED(ERROR, ev->domid, "failed to write to QMP socket");
+            return ERROR_FAIL;
+        }
+        ev->tx_buf_off += r;
+    }
+
+    if (ev->tx_buf_off == ev->tx_buf_len) {
+        free(ev->tx_buf);
+        ev->tx_buf = NULL;
+        ev->tx_buf_len = ev->tx_buf_off = 0;
+    }
+
+    return 0;
+}
+
+static int qmp_ev_callback_readable(libxl__egc *egc,
+                                    libxl__ev_qmp *ev, int fd)
+    /* !disconnected -> same state (with rx buffer updated)
+     * on error -> broken */
+{
+    EGC_GC;
+
+    while (1) {
+        ssize_t r;
+
+        /* Check if the buffer still have space, or increase size */
+        if (ev->rx_buf_size - ev->rx_buf_used < QMP_RECEIVE_BUFFER_SIZE) {
+            ev->rx_buf_size = max(ev->rx_buf_size * 2,
+                               (size_t)QMP_RECEIVE_BUFFER_SIZE * 2);
+            assert(ev->rx_buf_size <= QMP_MAX_SIZE_RX_BUF);
+            if (ev->rx_buf_size > QMP_MAX_SIZE_RX_BUF) {
+                LOGD(ERROR, ev->domid,
+                     "QMP receive buffer is too big (%ld > %lld)",
+                     ev->rx_buf_size, QMP_MAX_SIZE_RX_BUF);
+                return ERROR_BUFFERFULL;
+            }
+            ev->rx_buf = libxl__realloc(NOGC, ev->rx_buf, ev->rx_buf_size);
+        }
+
+        r = read(fd, ev->rx_buf + ev->rx_buf_used,
+                 ev->rx_buf_size - ev->rx_buf_used);
+        if (r < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+            if (errno == EWOULDBLOCK) {
+                break;
+            }
+            LOGED(ERROR, ev->domid, "error reading QMP socket");
+            return ERROR_FAIL;
+        }
+
+        if (r == 0) {
+            LOGD(ERROR, ev->domid, "Unexpected EOF on QMP socket");
+            return ERROR_PROTOCOL_ERROR_QMP;
+        }
+
+        LOG_QMP("received %ldB: '%.*s'", r,
+                (int)r, ev->rx_buf + ev->rx_buf_used);
+
+        ev->rx_buf_used += r;
+        assert(ev->rx_buf_used <= ev->rx_buf_size);
+    }
+
+    return 0;
+}
+
+/* Handle messages received from QMP server */
+
+static int qmp_ev_get_next_msg(libxl__egc *egc, libxl__ev_qmp *ev,
+                               libxl__json_object **o_r)
+    /* Find a JSON object and store it in o_r.
+     * return ERROR_NOTFOUND if no object is found.
+     * `o_r` is allocated within `egc`.
+     *
+     * !disconnected -> same state (with rx buffer updated)
+     */
+{
+    EGC_GC;
+    size_t len;
+    char *end = NULL;
+    libxl__json_object *o = NULL;
+
+    if (!ev->rx_buf_used)
+        return ERROR_NOTFOUND;
+
+    /* Search for the end of a QMP message: "\r\n" */
+    end = memmem(ev->rx_buf, ev->rx_buf_used, "\r\n", 2);
+    if (!end)
+        return ERROR_NOTFOUND;
+    len = (end - ev->rx_buf) + 2;
+
+    LOG_QMP("parsing %luB: '%.*s'", len, (int)len, ev->rx_buf);
+
+    /* Replace \r by \0 so that libxl__json_parse can use strlen */
+    ev->rx_buf[len - 2] = '\0';
+    o = libxl__json_parse(gc, ev->rx_buf);
+
+    if (!o) {
+        LOGD(ERROR, ev->domid, "Parse error");
+        return ERROR_PROTOCOL_ERROR_QMP;
+    }
+
+    ev->rx_buf_used -= len;
+    memmove(ev->rx_buf, ev->rx_buf + len, ev->rx_buf_used);
+
+    LOG_QMP("JSON object received: %s",
+            libxl__json_object_to_json(gc, o));
+
+    *o_r = o;
+
+    return 0;
+}
+
+static int qmp_ev_parse_error_messages(libxl__egc *egc,
+                                       libxl__ev_qmp *ev,
+                                       const libxl__json_object *resp)
+    /* no state change */
+{
+    EGC_GC;
+    int rc;
+    const char *s;
+    const libxl__json_object *o;
+    const libxl__json_object *err;
+
+    /*
+     * { "error": { "class": string, "desc": string } }
+     */
+
+    err = libxl__json_map_get("error", resp, JSON_MAP);
+
+    o = libxl__json_map_get("class", err, JSON_STRING);
+    if (!o) {
+        LOGD(ERROR, ev->domid,
+             "Protocol error: missing \"class\" member in error message");
+        return ERROR_PROTOCOL_ERROR_QMP;
+    }
+    s = libxl__json_object_get_string(o);
+    if (s)
+        rc = qmp_error_class_to_libxl_error_code(gc, s);
+    else
+        rc = ERROR_PROTOCOL_ERROR_QMP;
+
+    o = libxl__json_map_get("desc", err, JSON_STRING);
+    if (!o) {
+        LOGD(ERROR, ev->domid,
+             "Protocol error: missing \"desc\" member in error message");
+        return ERROR_PROTOCOL_ERROR_QMP;
+    }
+    s = libxl__json_object_get_string(o);
+    if (s)
+        LOGD(ERROR, ev->domid, "%s", s);
+    else
+        LOGD(ERROR, ev->domid, "Received unexpected error: %s",
+             libxl__json_object_to_json(gc, resp));
+    return rc;
+}
+
+static int qmp_ev_handle_message(libxl__egc *egc,
+                                 libxl__ev_qmp *ev,
+                                 const libxl__json_object *resp)
+    /*
+     * This function will handle every messages sent by the QMP server.
+     * Return values:
+     *   < 0    libxl error code
+     *   0      success
+     *   1      success, but a user callback has been called,
+     *          `ev` should not be used anymore.
+     *
+     * Possible state changes:
+     * connecting -> capability_negotiation
+     * capability_negotiation -> connected
+     * waiting_reply -> waiting_reply/connected
+     *
+     * on error: broken
+     */
+{
+    EGC_GC;
+    int id;
+    int rc;
+    const libxl__json_object *o;
+    const libxl__json_object *response;
+    libxl__qmp_message_type type = qmp_response_type(resp);
+
+    switch (type) {
+    case LIBXL__QMP_MESSAGE_TYPE_QMP:
+        /* greeting message */
+
+        if (ev->qmp_state != qmp_state_connecting) {
+            LOGD(ERROR, ev->domid,
+                 "Unexpected greeting message received");
+            return ERROR_PROTOCOL_ERROR_QMP;
+        }
+
+        /* Prepare next message to send */
+        assert(!ev->tx_buf);
+        ev->tx_buf = qmp_prepare_cmd(gc, "qmp_capabilities", NULL,
+                              QMP_CAPABILITY_NEGOTIATION_MSGID,
+                              &ev->tx_buf_len);
+        ev->tx_buf_off = 0;
+        qmp_ev_set_state(gc, ev, qmp_state_capability_negotiation);
+
+        return 0;
+
+    case LIBXL__QMP_MESSAGE_TYPE_RETURN:
+    case LIBXL__QMP_MESSAGE_TYPE_ERROR:
+        /*
+         * Reply to a command (success/error) or server error
+         *
+         * In this cases, we are parsing two possibles responses:
+         * - success:
+         * { "return": json-value, "id": int }
+         * - error:
+         * { "error": { "class": string, "desc": string }, "id": int }
+         */
+
+        o = libxl__json_map_get("id", resp, JSON_INTEGER);
+        if (!o) {
+            /*
+             * If "id" isn't present, an error occur on the server before
+             * it has read the "id" provided by libxl.
+             */
+            qmp_ev_parse_error_messages(egc, ev, resp);
+            return ERROR_PROTOCOL_ERROR_QMP;
+        }
+
+        id = libxl__json_object_get_integer(o);
+
+        if (id == QMP_CAPABILITY_NEGOTIATION_MSGID) {
+            /* We have a response to our qmp_capabilities cmd */
+            if (ev->qmp_state != qmp_state_capability_negotiation ||
+                type != LIBXL__QMP_MESSAGE_TYPE_RETURN)
+                goto out_unknown_id;
+            qmp_ev_set_state(gc, ev, qmp_state_connected);
+            return 0;
+        }
+
+        if (ev->qmp_state == qmp_state_waiting_reply &&
+            id == ev->id) {
+            if (type == LIBXL__QMP_MESSAGE_TYPE_RETURN) {
+                response = libxl__json_map_get("return", resp, JSON_ANY);
+                rc = 0;
+            } else {
+                /* error message */
+                response = NULL;
+                rc = qmp_ev_parse_error_messages(egc, ev, resp);
+            }
+            qmp_ev_set_state(gc, ev, qmp_state_connected);
+            ev->callback(egc, ev, response, rc); /* must be last */
+            return 1;
+        }
+
+out_unknown_id:
+        LOGD(ERROR, ev->domid,
+             "Message from QEMU with unexpected id %d: %s",
+             id, libxl__json_object_to_json(gc, resp));
+        return ERROR_PROTOCOL_ERROR_QMP;
+
+    case LIBXL__QMP_MESSAGE_TYPE_EVENT:
+        /* Events are ignored */
+        return 0;
+
+    case LIBXL__QMP_MESSAGE_TYPE_INVALID:
+        LOGD(ERROR, ev->domid, "Unexpected message received: %s",
+             libxl__json_object_to_json(gc, resp));
+        return ERROR_PROTOCOL_ERROR_QMP;
+
+    default:
+        abort();
+    }
+
+    return 0;
+}
+
+/*
+ * libxl__ev_qmp_*
+ */
+
+void libxl__ev_qmp_init(libxl__ev_qmp *ev)
+    /* disconnected -> disconnected */
+{
+    ev->id = QMP_CAPABILITY_NEGOTIATION_MSGID + 1;
+
+    ev->qmp_cfd = NULL;
+    libxl__ev_fd_init(&ev->qmp_efd);
+    ev->qmp_state = qmp_state_disconnected;
+
+    ev->rx_buf = NULL;
+    ev->rx_buf_size = ev->rx_buf_used = 0;
+    ev->tx_buf = NULL;
+    ev->tx_buf_len = ev->tx_buf_off = 0;
+
+    ev->msg = NULL;
+    ev->msg_len = 0;
+}
+
+int libxl__ev_qmp_send(libxl__gc *gc, libxl__ev_qmp *ev,
+                       const char *cmd, libxl__json_object *args)
+    /* disconnected -> connecting
+     * connected -> waiting_reply
+     * on error: disconnected */
+{
+    int rc;
+
+    LOGD(DEBUG, ev->domid, " ev %p, cmd '%s'", ev, cmd);
+
+    assert(ev->qmp_state == qmp_state_disconnected ||
+           ev->qmp_state == qmp_state_connected);
+
+    /* Connect to QEMU if not already connected */
+    rc = qmp_ev_connect(gc, ev);
+    if (rc)
+        goto out;
+
+    rc = qmp_ev_prepare_cmd(gc, ev, cmd, args);
+    if (rc)
+        goto out;
+
+    qmp_ev_ensure_reading_writing(gc, ev);
+
+out:
+    if (rc)
+        libxl__ev_qmp_dispose(gc, ev);
+    return rc;
+}
+
+void libxl__ev_qmp_dispose(libxl__gc *gc, libxl__ev_qmp *ev)
+    /* * -> disconnected */
+{
+    LOGD(DEBUG, ev->domid, " ev %p", ev);
+
+    free(ev->rx_buf);
+    free(ev->tx_buf);
+    free(ev->msg);
+
+    libxl__ev_fd_deregister(gc, &ev->qmp_efd);
+    libxl__carefd_close(ev->qmp_cfd);
+
+    libxl__ev_qmp_init(ev);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 3b8f967651..fec42b260c 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -69,6 +69,12 @@ libxl_error = Enumeration("error", [
     (-23, "NOTFOUND"),
     (-24, "DOMAIN_DESTROYED"), # Target domain ceased to exist during op
     (-25, "FEATURE_REMOVED"), # For functionality that has been removed
+    (-26, "PROTOCOL_ERROR_QMP"),
+    (-27, "UNKNOWN_QMP_ERROR"),
+    (-28, "QMP_GENERIC_ERROR"), # unspecified qmp error
+    (-29, "QMP_COMMAND_NOT_FOUND"), # the requested command has not been found
+    (-30, "QMP_DEVICE_NOT_ACTIVE"), # a device has failed to be become active
+    (-31, "QMP_DEVICE_NOT_FOUND"), # the requested device has not been found
     ], value_namespace = "")
 
 libxl_domain_type = Enumeration("domain_type", [
-- 
Anthony PERARD


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-15 11:15               ` [PATCH v6.2 " Anthony PERARD
@ 2018-11-15 18:40                 ` Ian Jackson
  2018-11-16 14:53                   ` Anthony PERARD
  0 siblings, 1 reply; 15+ messages in thread
From: Ian Jackson @ 2018-11-15 18:40 UTC (permalink / raw)
  To: Anthony PERARD; +Cc: xen-devel, Wei Liu

Anthony PERARD writes ("[PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>

Thanks for the additional comments.  I got thoroughly stuck in.

Overall the structure looks broadly right and my comments are
generally about details.  As might be expected, some of them are
stylistic or matters of taste.

Please feel free to push back if you disagree with me on anything.

>  struct libxl__ev_qmp {
>      /* caller should include this in their own struct */
>      /* caller must fill these in, and they must all remain valid */
>      libxl_domid domid;
>      libxl__ev_qmp_callback *callback;
>      int fd; /* set to send a fd with the command, -1 otherwise */
> +

This name `fd' becomes very confusing, because most of the time with
qmp the fd in question is our socket to qemu.  Can you rename it ?
payload_fd maybe ?

> +    /*
> +     * remaining fields are private to libxl_ev_qmp_*
> +     */
> +
> +    int id;
> +    libxl__carefd *qmp_cfd;
> +    libxl__ev_fd qmp_efd;
> +    libxl__qmp_state qmp_state;

What purpose do the qmp_ prefixes on these serve ?  I think if it were
me I would drop them and simply call these `cfd' and `efd' and `state'.


> +/* ------------ Implementation of libxl__ev_qmp ---------------- */
> +
> +/*
> + * Possible internal state compared to qmp_state:
> + *
> + * qmp_state      External  qmp_cfd  qmp_efd  id      rx_buf* tx_buf* msg*
> + * disconnected   Idle      close    Idle     reset   free    free    free
                               ^^^^^
YM `0' or `NULL'.

Maybe this table would be easier to read if the headings were offset
from the values.  Eg:
  + * qmp_state     External  qmp_cfd  qmp_efd  id      rx_buf* tx_buf* msg*
  + * disconnected   Idle      close    Idle     reset   free    free    free
?
Up to you.

> + * connecting     Active    open     Active   set     used    free    set

You say Active for the qmp_efd but I think it would be better to say
what you are waiting for ?  Looking at qmp_ev_connect I think
connecting has only POLLIN.  You could write IN, IN[|OUT], etc., maybe.

While reading the rest of the code I found that this was often a
critical piece of state you are managing, so I think some improvement
here is warranted.

> + * capability_negotiation
> + *                Active    open     Active   set     used    any     set

I would abbreviate this state name here, because it spoils the table.
`cap.nego.' ?  Or maybe you could just rename it to capab_nego or
something.

> + * connected      Connected open     Active   prev[1] used    free    any

What msg might there be in state `connected' ?  According to the
public interface there is no message yet.

In general the movement of the caller's intended qmp command from msg
to rx_buf through to qemu could perhaps do with some exposition
(commentary).

> + * waiting_reply  Active    open     Active   set     used    any     free

I am a bit confused about the semantics of tx_buf being free in state
waiting_reply.  Does that mean the caller's wanted command has been
sent ?  This seems like part of the same question as I just asked...

Maybe you want to split this into two rows:

  + * waiting_reply  Active    open     IN|OUT   set     used    user's  free
  + * waiting_reply  Active    open     IN       set     used    free    free

?

> + * broken[2]      none[3]   any      Active   any     any     any     any
...
> + * [1] id used on the previously sent command

It would be better if the id column stated something more useful than
`set'.  `next' maybe, where applicable ?

AIUI it is intended (important?) that ids should not be reused.

> + * Possible buffers states:
> + * - receiving buffer:
> + *                    free   used
> + *   rx_buf           NULL   allocated
> + *   rx_buf_size      0      allocation size of `rx_buf`
> + *   rx_buf_off       0      <= rx_buf_size
> + * - transmiting buffer:

Typo `transmitting'.

A minor point, but indenting the rx_* things here would avoid them
lining up with `transmitting buffer' and misleading the eye.

Better still, have you considered moving this table into the struct
itself ?  You could put the table to the RHS of the actual member
definitions.  That gives slightly fewer places to look, although it
would involve a cross-reference from this wider state description to
the field's state tables.

> + *                    free   used
> + *   tx_buf           NULL   contain data
> + *   tx_buf_len       0      size of data
> + *   tx_buf_off       0      <= tx_buf_len

You should explain the semantics of _off in both cases.  It points
into the buffer, but what is the meaning of the data (or the space)
before and after it ?

... oh, I see for rx_buf_used this is documented in the struct
itself.  But not for tx_buf_off.

Which leads me to say: the struct contains rx_buf_used but the comment
here talks about _off.

> + * - queued user command:
> + *                    free  set
> + *   msg              NULL  contain data

YM `contains' data, in both cases.

> + *   msg_len          0     size of data
> + *
> + * - Allowed internal state transition:
> + * disconnected                         -> connecting
> + * connection                           -> capability_negotiation
> + * capability_negotiation/waiting_reply -> connected
> + * connected                            -> waiting_reply
> + * any                                  -> disconnected

This does not mention the state `broken' and it should.

> +static void qmp_ev_ensure_reading_writing(libxl__gc *gc, libxl__ev_qmp *ev)
> +    /* !disconnected -> same state */

This is not really true.  This function is one to transform a
transient, partial, internal state, into the corresponding proper
internal state, by setting up the fd event.

This will become more obvious if you add the IN etc. to the efd column
in the internal state table.

> +    if (ev->tx_buf) {
> +        enable = true;
> +    } else {
> +        enable = (ev->qmp_state == qmp_state_connected) && ev->msg;
> +    }
> +
> +    if (enable)
> +        events = ev->qmp_efd.events | POLLOUT;
> +    else
> +        events = ev->qmp_efd.events & ~POLLOUT;
> +
> +    libxl__ev_fd_modify(gc, &ev->qmp_efd, events);

This function seems more complicated than it needs to be ?

AFAICT your code never clears POLLIN.  And this function always
unconditionally clears or sets POLLOUT.  So the desired events do not
in fact depend on qmp_efd.events ?

Which makes sense, because from this name I would expect this function
to figure out for itself exactly what events ought to be listened for.

I think you want something like
      events = POLLIN;
      if (some stuff) {
         events |= POLLOUT;
etc.

> +static void qmp_ev_set_state(libxl__gc *gc, libxl__ev_qmp *ev,
> +                             libxl__qmp_state new_state)
> +{

Now that you have `broken' you should probably mention that this
function is not used to enter the state `broken' (that is done
willy-nilly), nor the state `disconnected' (that is done in _init).

> +static int qmp_error_class_to_libxl_error_code(libxl__gc *gc,
> +                                               const char *eclass)
> +{
> +    const libxl_enum_string_table *t = libxl_error_string_table;
> +
> +    /* compare "QMP_GENERIC_ERROR" from libxl_error to "GenericError"
> +     * generated by the QMP server */
> +
> +    for ( ; t->s; t++) {
> +            const char *s = eclass;
> +            const char *se = t->s;
> +        if (strncasecmp(t->s, "QMP_", 4))
> +            continue;
> +
> +        /* skip "QMP_" */
> +        se += 4;

This code contains or implies the value `4' a total of 4 times: three
times in the code ("QMP_", 4, 4) and once in a comment.  How about
   const char skip[] = "QMP_";
   const size_t skipl = sizeof(skip)-1;
?

> +        while (*s && *se) {
> +            /* skip underscores */
> +            if (*se == '_') {
> +                se++;
> +                continue;
> +            }
> +            if (tolower(*s) != tolower(*se))
> +                break;
> +            s++, se++;
> +        }
> +        if (!*s && !*se)
> +            return t->v;

I confess I find this way of constructing this loop a bit confusing.
I would write this as a for(;;) loop and deal with each of the exit
conditions in the loop body.  But that is a matter of taste I think so
if you prefer it like this, fine.

> +    }
> +
> +    return ERROR_UNKNOWN_QMP_ERROR;

I think you probably wanted to log the original error string here so
that it does not become lost.

> +static int qmp_ev_prepare_cmd(libxl__gc *gc,
> +                              libxl__ev_qmp *ev,
> +                              const char *cmd,
> +                              const libxl__json_object *args)
> +    /* on entry: connecting/connected but with `msg` free
> +     * on return: same state but with `msg` set */
> +{
> +    char *buf = NULL;
> +    size_t len;
> +
> +    assert(!ev->tx_buf && !ev->tx_buf_len);
> +    assert(!ev->msg && !ev->msg_len);
> +
> +    ev->id++;
> +    buf = qmp_prepare_cmd(gc, cmd, args, ev->id, &len);
> +    if (!buf) {
> +        return ERROR_FAIL;
> +    }

There is no doc comment for qmp_prepare_cmd saying whether it logs on
error.  So it doesn't ?  In which case maybe you should do so here.

Alternatively if it can only really fail due to memory allocation
failure, maybe it should return void (and call libxl__alloc_failed
on allocation failure) instead ?

If you don't need to log I think you should drop the { } here.

See also my comments on the other patch, about len.

> +/* Setup connection */
> +
> +static int qmp_ev_connect(libxl__gc *gc, libxl__ev_qmp *ev)
> +    /* disconnected -> connecting but with `msg` free
> +     * on error: disconnected
> +     * If the initial state isn't disconnected, then nothing is done */

Maybe it be better for this function to return `broken' on error ?

Also, I think it is anomalous that this function handles `broken' as a
no-op and claims success.  I think that is not really what a caller
might expect.  Maybe you want to forbid `broken' (with a corresponding
assert) or to permit it (calling dispose before doing the rest of the
work).

Contrary to the state description, this function does not transition
rx_buf from free to used.  However, I think this would probably be
more easily remedied by changing the definition of `used' to permit
NULL/0/0.  You might want to use a different word to `used', `inuse'
perhaps ?

> +out:
> +    libxl__carefd_close(ev->qmp_cfd);
> +    ev->qmp_cfd = NULL;
> +    return rc;

If this function were permitted to leave the state `broken' on error,
this separate error code would not be needed.  Is there some reason
not to do that ?

> +static void qmp_ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev_fd,
> +                               int fd, short events, short revents)
> +    /* On entry, ev_fd is (of course) Active.  The ev_qmp may be in any
> +     * state where this is permitted.  qmp_ev_fd_callback will do the work
> +     * necessary to make progress, depending on the current state, and make
> +     * the appropriate state transitions and callbacks.  */
> +{
> +    EGC_GC;
> +    int rc;
> +    libxl__json_object *o = NULL;
> +    libxl__ev_qmp *ev = CONTAINER_OF(ev_fd, *ev, qmp_efd);
> +
> +    if (revents & (POLLHUP)) {
> +        LOGD(ERROR, ev->domid, "received POLLHUP from QMP socket");
> +        rc = ERROR_PROTOCOL_ERROR_QMP;
> +        goto out;

This approach, without more, loses the error code from connect().
I think you need to call getsockopt(fd, SOL_SOCKET, SO_ERROR, ...)
and log the errno value if there is one.
  http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_10_16

> +    if (revents & POLLOUT) {
> +        rc = qmp_ev_callback_writable(gc, ev, fd);
...
> +    if (revents & POLLIN) {
> +        rc = qmp_ev_callback_readable(egc, ev, fd);
> +        if (rc)
> +            goto out;
> +
> +        /* parse input */

I find it odd that this input parsing is not part of
qmp_ev_callback_readable.  What do you think about moving it there ?

> +    qmp_ev_ensure_reading_writing(gc, ev);
> +
> +out:
> +    if (rc) {
> +        LOGD(ERROR, ev->domid,
> +             "Error happend with the QMP connection to QEMU");

Typo `happened'.

> +static int qmp_ev_callback_writable(libxl__gc *gc,
> +                                    libxl__ev_qmp *ev, int fd)
> +    /* on entry: !disconnected
> +     * on return, one of these state transition:
> +     *   connected -> waiting_reply
> +     *   * -> state unchange
> +     *   but with the state of tx_buf changed
> +     * on error: broken */
> +{
> +    int rc;
> +    ssize_t r;
> +
> +    if (ev->qmp_state == qmp_state_connected) {
> +        assert(!ev->tx_buf);
> +        if (ev->msg) {

See my comments about the state `connected' and msg.

> +    /*
> +     * We will send a file descriptor associated with a command on the
> +     * first byte of this command.
> +     */
> +    if (ev->qmp_state == qmp_state_waiting_reply &&
> +        ev->fd >= 0 &&
> +        ev->tx_buf_off == 0) {

According to the doc comments, state waiting_reply might have tx_buf
free, in which case it would try to execute this code.  I don't think
that can be right.

> +    if (ev->tx_buf_off == ev->tx_buf_len) {
> +        free(ev->tx_buf);
> +        ev->tx_buf = NULL;
> +        ev->tx_buf_len = ev->tx_buf_off = 0;

This appears in _init too.  Maybe you want a qmp_ev_tx_buf_clear
function ?

> +static int qmp_ev_callback_readable(libxl__egc *egc,
> +                                    libxl__ev_qmp *ev, int fd)
> +    /* !disconnected -> same state (with rx buffer updated)
> +     * on error -> broken */
> +{
> +    EGC_GC;
> +
> +    while (1) {
> +        ssize_t r;
> +
> +        /* Check if the buffer still have space, or increase size */
> +        if (ev->rx_buf_size - ev->rx_buf_used < QMP_RECEIVE_BUFFER_SIZE) {
> +            ev->rx_buf_size = max(ev->rx_buf_size * 2,
> +                               (size_t)QMP_RECEIVE_BUFFER_SIZE * 2);

How about

  +            ev->rx_buf_size *= 2;
  +            ev->rx_buf_size += QMP_RECEIVE_BUFFER_SIZE;

or some such ?  That's easier to read than a conditional.

> +            assert(ev->rx_buf_size <= QMP_MAX_SIZE_RX_BUF);
> +            if (ev->rx_buf_size > QMP_MAX_SIZE_RX_BUF) {

I think the assert is redundant.

> +                LOGD(ERROR, ev->domid,
> +                     "QMP receive buffer is too big (%ld > %lld)",
> +                     ev->rx_buf_size, QMP_MAX_SIZE_RX_BUF);
> +                return ERROR_BUFFERFULL;
> +            }

Putting this error check between the change to rx_buf_size and the
change to rx_buf obscures the essential connection between both
changes, which must be indivisible.

It also results in a bug: if this error trips, rx_buf_size has been
updated but rx_buf has not.  This is a dangerously invalid state.

This function tries to read everything that qemu produces and calls
the whole thing a loss of qemu produces more than QMP_MAX_SIZE_RX_BUF.
But perhaps qemu producing more than QMP_MAX_SIZE_RX_BUF output might
not be the result of qemu going mad.  It might be the result of us
having ignored it for a while, and qemu filling a buffer with event
notifications which we might want to discard ?

Would it not be better to process messages as they arrive ?  Ie to put
the attempt to find valid messages here inside this loop ?

That might also allow the reduction of the maximum message size from
8Mby.  8Mby seems like quite a lot.

Of such a change would result in a possible return to the caller with
the qmp fd having pending data to read.  We would have to make sure to
actually call read again after sending a new command, in case the efd
is edge triggered.

> +        r = read(fd, ev->rx_buf + ev->rx_buf_used,
> +                 ev->rx_buf_size - ev->rx_buf_used);
> +        if (r < 0) {
> +            if (errno == EINTR) {
> +                continue;
> +            }
> +            if (errno == EWOULDBLOCK) {
> +                break;
> +            }

These { } around simple breaks, continues, etc. are strictly
unnecessary FYI.  Personally I find that less cluttered.  But they're
permitted by CODING_STYLE and you can keep them if you want them.


> +/* Handle messages received from QMP server */
> +
> +static int qmp_ev_get_next_msg(libxl__egc *egc, libxl__ev_qmp *ev,
> +                               libxl__json_object **o_r)
> +    /* Find a JSON object and store it in o_r.
> +     * return ERROR_NOTFOUND if no object is found.
> +     * `o_r` is allocated within `egc`.
> +     *
> +     * !disconnected -> same state (with rx buffer updated)
> +     */
> +{
...
> +    /* Search for the end of a QMP message: "\r\n" */
> +    end = memmem(ev->rx_buf, ev->rx_buf_used, "\r\n", 2);
> +    if (!end)
> +        return ERROR_NOTFOUND;
> +    len = (end - ev->rx_buf) + 2;

In:

  Subject: Re: [PATCH v5 03/15] libxl_qmp: Implement fd callback and read data [and 1 more messages]
  Date: Mon, 29 Oct 2018 17:31:59 +0000

I wrote:

  But I think you should treat only `\n' as the delimiter.  This will
  considerably simplify the buffer handling.  (You should check and trim
  the preceding `\r' before passing things to libxl__json_parse of
  course.)

But you don't seem to have done that, or replied ?

> +static int qmp_ev_handle_message(libxl__egc *egc,
> +                                 libxl__ev_qmp *ev,
> +                                 const libxl__json_object *resp)
> +    /*
> +     * This function will handle every messages sent by the QMP server.
> +     * Return values:
> +     *   < 0    libxl error code
> +     *   0      success
> +     *   1      success, but a user callback has been called,
> +     *          `ev` should not be used anymore.
...
> +    case LIBXL__QMP_MESSAGE_TYPE_QMP:
...
> +        assert(!ev->tx_buf);
> +        ev->tx_buf = qmp_prepare_cmd(gc, "qmp_capabilities", NULL,
> +                              QMP_CAPABILITY_NEGOTIATION_MSGID,
> +                              &ev->tx_buf_len);
> +        ev->tx_buf_off = 0;

Why do you not use qmp_ev_prepare_cmd for this ?

> +    case LIBXL__QMP_MESSAGE_TYPE_RETURN:
> +    case LIBXL__QMP_MESSAGE_TYPE_ERROR:
...
> +        o = libxl__json_map_get("id", resp, JSON_INTEGER);
> +        if (!o) {
> +            /*
> +             * If "id" isn't present, an error occur on the server before
> +             * it has read the "id" provided by libxl.
> +             */
> +            qmp_ev_parse_error_messages(egc, ev, resp);
> +            return ERROR_PROTOCOL_ERROR_QMP;

I think you wanted to return the error code from
qmp_ev_parse_error_messages ?

> +        id = libxl__json_object_get_integer(o);
> +
> +        if (id == QMP_CAPABILITY_NEGOTIATION_MSGID) {
> +            /* We have a response to our qmp_capabilities cmd */
> +            if (ev->qmp_state != qmp_state_capability_negotiation ||
> +                type != LIBXL__QMP_MESSAGE_TYPE_RETURN)
> +                goto out_unknown_id;
> +            qmp_ev_set_state(gc, ev, qmp_state_connected);
> +            return 0;
> +        }

I'm a bit puzzled about the capability negotation, its id, etc.:

* Why does QMP_CAPABILITY_NEGOTIATION_MSGID exist at all ?  You
  could just use a normal id, couldn't you ?  And you'd be able to
  tell from your own state that it was the right value.

* Why are we even doing capability negotation when we throw the answer
  away ?

* I think it would be better to check id == ev->id first, rather
  than in each of these branches.

> +        if (ev->qmp_state == qmp_state_waiting_reply &&
> +            id == ev->id) {
> +            if (type == LIBXL__QMP_MESSAGE_TYPE_RETURN) {
> +                response = libxl__json_map_get("return", resp, JSON_ANY);
> +                rc = 0;
> +            } else {
> +                /* error message */
> +                response = NULL;
> +                rc = qmp_ev_parse_error_messages(egc, ev, resp);
> +            }
> +            qmp_ev_set_state(gc, ev, qmp_state_connected);
> +            ev->callback(egc, ev, response, rc); /* must be last */
> +            return 1;
> +        }
> +
> +out_unknown_id:

We're just falling through here.  It seems weird to me.

I would expect this sequence of checks:

   1. Check id against our id; should be the same; if not, this
      is an out of sequence reply which is not possible, so
      call it a protocol error.

   2. Check if the thing is an error response; if it is, parse and
      return the error message.

   2. It must be a RETURN.  (Maybe add an assert here.)  Check our
      state, which should be capability_negotiation (in which case we
      go to the next thing) or waiting_reply (in which case we're
      done).  Otherwise it is a protocol error.

> +int libxl__ev_qmp_send(libxl__gc *gc, libxl__ev_qmp *ev,
> +                       const char *cmd, libxl__json_object *args)
> +    /* disconnected -> connecting
> +     * connected -> waiting_reply
> +     * on error: disconnected */
> +{
> +    int rc;
> +
> +    LOGD(DEBUG, ev->domid, " ev %p, cmd '%s'", ev, cmd);
> +
> +    assert(ev->qmp_state == qmp_state_disconnected ||
> +           ev->qmp_state == qmp_state_connected);
> +
> +    /* Connect to QEMU if not already connected */
> +    rc = qmp_ev_connect(gc, ev);
> +    if (rc)
> +        goto out;
> +
> +    rc = qmp_ev_prepare_cmd(gc, ev, cmd, args);
> +    if (rc)
> +        goto out;
> +
> +    qmp_ev_ensure_reading_writing(gc, ev);
> +
> +out:

Please add
       rc = 0;
before out;

But you might prefer just to
       return 0;
since there is nothing allocated in this function that ought to be
disposed (which is nice).


Thanks,
Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-15 18:40                 ` Ian Jackson
@ 2018-11-16 14:53                   ` Anthony PERARD
  2018-11-16 16:09                     ` Ian Jackson
  0 siblings, 1 reply; 15+ messages in thread
From: Anthony PERARD @ 2018-11-16 14:53 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel, Wei Liu

On Thu, Nov 15, 2018 at 06:40:13PM +0000, Ian Jackson wrote:
> Anthony PERARD writes ("[PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> > Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
> 
> Thanks for the additional comments.  I got thoroughly stuck in.
> 
> Overall the structure looks broadly right and my comments are
> generally about details.  As might be expected, some of them are
> stylistic or matters of taste.
> 
> Please feel free to push back if you disagree with me on anything.
> 
> >  struct libxl__ev_qmp {
> >      /* caller should include this in their own struct */
> >      /* caller must fill these in, and they must all remain valid */
> >      libxl_domid domid;
> >      libxl__ev_qmp_callback *callback;
> >      int fd; /* set to send a fd with the command, -1 otherwise */
> > +
> 
> This name `fd' becomes very confusing, because most of the time with
> qmp the fd in question is our socket to qemu.  Can you rename it ?
> payload_fd maybe ?

payload_fd sound fine, I'll use that.

> > +    /*
> > +     * remaining fields are private to libxl_ev_qmp_*
> > +     */
> > +
> > +    int id;
> > +    libxl__carefd *qmp_cfd;
> > +    libxl__ev_fd qmp_efd;
> > +    libxl__qmp_state qmp_state;
> 
> What purpose do the qmp_ prefixes on these serve ?  I think if it were
> me I would drop them and simply call these `cfd' and `efd' and `state'.

Sounds good.

> 
> > +/* ------------ Implementation of libxl__ev_qmp ---------------- */
> > +
> > +/*
> > + * Possible internal state compared to qmp_state:
> > + *
> > + * qmp_state      External  qmp_cfd  qmp_efd  id      rx_buf* tx_buf* msg*
> > + * disconnected   Idle      close    Idle     reset   free    free    free
>                                ^^^^^
> YM `0' or `NULL'.

Yes.

> Maybe this table would be easier to read if the headings were offset
> from the values.  Eg:
>   + * qmp_state     External  qmp_cfd  qmp_efd  id      rx_buf* tx_buf* msg*
>   + * disconnected   Idle      close    Idle     reset   free    free    free
> ?
> Up to you.

I'll give a try.

> > + * connecting     Active    open     Active   set     used    free    set
> 
> You say Active for the qmp_efd but I think it would be better to say
> what you are waiting for ?  Looking at qmp_ev_connect I think
> connecting has only POLLIN.  You could write IN, IN[|OUT], etc., maybe.
> 
> While reading the rest of the code I found that this was often a
> critical piece of state you are managing, so I think some improvement
> here is warranted.

Indeed, it's important to not have POLLOUT all the time, or we just have
a busy loop.

> > + * capability_negotiation
> > + *                Active    open     Active   set     used    any     set
> 
> I would abbreviate this state name here, because it spoils the table.
> `cap.nego.' ?  Or maybe you could just rename it to capab_nego or
> something.

Will do. 75cols is barely enough to write all the states :).

> > + * connected      Connected open     Active   prev[1] used    free    any
> 
> What msg might there be in state `connected' ?  According to the
> public interface there is no message yet.
> 
> In general the movement of the caller's intended qmp command from msg
> to rx_buf through to qemu could perhaps do with some exposition
> (commentary).
> 
> > + * waiting_reply  Active    open     Active   set     used    any     free
> 
> I am a bit confused about the semantics of tx_buf being free in state
> waiting_reply.  Does that mean the caller's wanted command has been
> sent ?  This seems like part of the same question as I just asked...
> 
> Maybe you want to split this into two rows:
> 
>   + * waiting_reply  Active    open     IN|OUT   set     used    user's  free
>   + * waiting_reply  Active    open     IN       set     used    free    free
> 
> ?

Spliting some states in two rows sound good, I might do it in other
cases as well.

> > + * broken[2]      none[3]   any      Active   any     any     any     any
> ...
> > + * [1] id used on the previously sent command
> 
> It would be better if the id column stated something more useful than
> `set'.  `next' maybe, where applicable ?

When 'set', id is the id of the user command we intend to send or have
already sent and waiting for the associated reply. I'm not sure which
word or set of word would describe enough. I might need to add a comment
below the table.

> AIUI it is intended (important?) that ids should not be reused.

I think it's important, it can help detect mistakes. If we reuse the
same id, we might as well not have one.

I can also help to figure out if an error message is because a command
failed to execute or if the server itself had an error.

> > + * Possible buffers states:
> > + * - receiving buffer:
> > + *                    free   used
> > + *   rx_buf           NULL   allocated
> > + *   rx_buf_size      0      allocation size of `rx_buf`
> > + *   rx_buf_off       0      <= rx_buf_size
> > + * - transmiting buffer:
> 
> Typo `transmitting'.
> 
> A minor point, but indenting the rx_* things here would avoid them
> lining up with `transmitting buffer' and misleading the eye.
> 
> Better still, have you considered moving this table into the struct
> itself ?  You could put the table to the RHS of the actual member
> definitions.  That gives slightly fewer places to look, although it
> would involve a cross-reference from this wider state description to
> the field's state tables.

Indeed, it feels kind of wrong to have the description of the internal
states in two different places. There are some descriptions in the
struct, but those are probably not needed.

> > + *                    free   used
> > + *   tx_buf           NULL   contain data
> > + *   tx_buf_len       0      size of data
> > + *   tx_buf_off       0      <= tx_buf_len
> 
> You should explain the semantics of _off in both cases.  It points
> into the buffer, but what is the meaning of the data (or the space)
> before and after it ?
> 
> ... oh, I see for rx_buf_used this is documented in the struct
> itself.  But not for tx_buf_off.
> 
> Which leads me to say: the struct contains rx_buf_used but the comment
> here talks about _off.

:(, I need to fix that.

And here is the description:

rx_buf_used: actual data in the buffer

tx_buf_off: data already sent

> > + * - queued user command:
> > + *                    free  set
> > + *   msg              NULL  contain data
> 
> YM `contains' data, in both cases.
> 
> > + *   msg_len          0     size of data
> > + *
> > + * - Allowed internal state transition:
> > + * disconnected                         -> connecting
> > + * connection                           -> capability_negotiation
> > + * capability_negotiation/waiting_reply -> connected
> > + * connected                            -> waiting_reply
> > + * any                                  -> disconnected
> 
> This does not mention the state `broken' and it should.

Maybe:
  any -> broken

Or probably more accurate:
  !disconnected -> broken

> > +static void qmp_ev_ensure_reading_writing(libxl__gc *gc, libxl__ev_qmp *ev)
> > +    /* !disconnected -> same state */
> 
> This is not really true.  This function is one to transform a
> transient, partial, internal state, into the corresponding proper
> internal state, by setting up the fd event.
> 
> This will become more obvious if you add the IN etc. to the efd column
> in the internal state table.

Indeed, I'll fix the comment once the table have all the IN[|OUT].

> > +    if (ev->tx_buf) {
> > +        enable = true;
> > +    } else {
> > +        enable = (ev->qmp_state == qmp_state_connected) && ev->msg;
> > +    }
> > +
> > +    if (enable)
> > +        events = ev->qmp_efd.events | POLLOUT;
> > +    else
> > +        events = ev->qmp_efd.events & ~POLLOUT;
> > +
> > +    libxl__ev_fd_modify(gc, &ev->qmp_efd, events);
> 
> This function seems more complicated than it needs to be ?
> 
> AFAICT your code never clears POLLIN.  And this function always
> unconditionally clears or sets POLLOUT.  So the desired events do not
> in fact depend on qmp_efd.events ?
> 
> Which makes sense, because from this name I would expect this function
> to figure out for itself exactly what events ought to be listened for.
> 
> I think you want something like
>       events = POLLIN;
>       if (some stuff) {
>          events |= POLLOUT;
> etc.

I'll fix that.

> > +static void qmp_ev_set_state(libxl__gc *gc, libxl__ev_qmp *ev,
> > +                             libxl__qmp_state new_state)
> > +{
> 
> Now that you have `broken' you should probably mention that this
> function is not used to enter the state `broken' (that is done
> willy-nilly), nor the state `disconnected' (that is done in _init).
> 
> > +static int qmp_error_class_to_libxl_error_code(libxl__gc *gc,
> > +                                               const char *eclass)
> > +{
> > +    const libxl_enum_string_table *t = libxl_error_string_table;
> > +
> > +    /* compare "QMP_GENERIC_ERROR" from libxl_error to "GenericError"
> > +     * generated by the QMP server */
> > +
> > +    for ( ; t->s; t++) {
> > +            const char *s = eclass;
> > +            const char *se = t->s;
> > +        if (strncasecmp(t->s, "QMP_", 4))
> > +            continue;
> > +
> > +        /* skip "QMP_" */
> > +        se += 4;
> 
> This code contains or implies the value `4' a total of 4 times: three
> times in the code ("QMP_", 4, 4) and once in a comment.  How about
>    const char skip[] = "QMP_";
>    const size_t skipl = sizeof(skip)-1;
> ?

Sounds good, I'll use that.

> > +    }
> > +
> > +    return ERROR_UNKNOWN_QMP_ERROR;
> 
> I think you probably wanted to log the original error string here so
> that it does not become lost.

Right, I'll log it.

> > +static int qmp_ev_prepare_cmd(libxl__gc *gc,
> > +                              libxl__ev_qmp *ev,
> > +                              const char *cmd,
> > +                              const libxl__json_object *args)
> > +    /* on entry: connecting/connected but with `msg` free
> > +     * on return: same state but with `msg` set */
> > +{
> > +    char *buf = NULL;
> > +    size_t len;
> > +
> > +    assert(!ev->tx_buf && !ev->tx_buf_len);
> > +    assert(!ev->msg && !ev->msg_len);
> > +
> > +    ev->id++;
> > +    buf = qmp_prepare_cmd(gc, cmd, args, ev->id, &len);
> > +    if (!buf) {
> > +        return ERROR_FAIL;
> > +    }
> 
> There is no doc comment for qmp_prepare_cmd saying whether it logs on
> error.  So it doesn't ?  In which case maybe you should do so here.
> 
> Alternatively if it can only really fail due to memory allocation
> failure, maybe it should return void (and call libxl__alloc_failed
> on allocation failure) instead ?

So we can have the allocation been done by libxl's functions, as
yajl_gen_alloc can use other functions that malloc/free/realloc, that
would remove the allocation failure.

But then I think that qmp_prepare_cmd could fail and return null if
'cmd' or 'args' contains bad json objects, or a wrong string.

So we still needs to detect failure.

I log here as I can add the domid.

> If you don't need to log I think you should drop the { } here.
> 
> See also my comments on the other patch, about len.

I'll probably remove len and use strlen when needed.

> > +/* Setup connection */
> > +
> > +static int qmp_ev_connect(libxl__gc *gc, libxl__ev_qmp *ev)
> > +    /* disconnected -> connecting but with `msg` free
> > +     * on error: disconnected
> > +     * If the initial state isn't disconnected, then nothing is done */
> 
> Maybe it be better for this function to return `broken' on error ?

That not the case, but I could change to return broken on error, and let
the caller clean the state.

> Also, I think it is anomalous that this function handles `broken' as a
> no-op and claims success.  I think that is not really what a caller
> might expect.  Maybe you want to forbid `broken' (with a corresponding
> assert) or to permit it (calling dispose before doing the rest of the
> work).

I'm thinking to change the definition and only allow this function to be
called with the state disconnected, and have the caller check for the
current state.

> Contrary to the state description, this function does not transition
> rx_buf from free to used.  However, I think this would probably be
> more easily remedied by changing the definition of `used' to permit
> NULL/0/0.  You might want to use a different word to `used', `inuse'
> perhaps ?

Will do.

> > +out:
> > +    libxl__carefd_close(ev->qmp_cfd);
> > +    ev->qmp_cfd = NULL;
> > +    return rc;
> 
> If this function were permitted to leave the state `broken' on error,
> this separate error code would not be needed.  Is there some reason
> not to do that ?

Not really.

> > +static void qmp_ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev_fd,
> > +                               int fd, short events, short revents)
> > +    /* On entry, ev_fd is (of course) Active.  The ev_qmp may be in any
> > +     * state where this is permitted.  qmp_ev_fd_callback will do the work
> > +     * necessary to make progress, depending on the current state, and make
> > +     * the appropriate state transitions and callbacks.  */
> > +{
> > +    EGC_GC;
> > +    int rc;
> > +    libxl__json_object *o = NULL;
> > +    libxl__ev_qmp *ev = CONTAINER_OF(ev_fd, *ev, qmp_efd);
> > +
> > +    if (revents & (POLLHUP)) {
> > +        LOGD(ERROR, ev->domid, "received POLLHUP from QMP socket");
> > +        rc = ERROR_PROTOCOL_ERROR_QMP;
> > +        goto out;
> 
> This approach, without more, loses the error code from connect().
> I think you need to call getsockopt(fd, SOL_SOCKET, SO_ERROR, ...)
> and log the errno value if there is one.
>   http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_10_16

I'll have a look.

> > +    if (revents & POLLOUT) {
> > +        rc = qmp_ev_callback_writable(gc, ev, fd);
> ...
> > +    if (revents & POLLIN) {
> > +        rc = qmp_ev_callback_readable(egc, ev, fd);
> > +        if (rc)
> > +            goto out;
> > +
> > +        /* parse input */
> 
> I find it odd that this input parsing is not part of
> qmp_ev_callback_readable.  What do you think about moving it there ?

I wanted to reduce the number of function calls between when a user
callback is called and when ev_qmp's control passes outsite of ev_qmp's
implementation. So I moved qmp_ev_handle_message() call here.

> > +static int qmp_ev_callback_writable(libxl__gc *gc,
> > +                                    libxl__ev_qmp *ev, int fd)
> > +    /* on entry: !disconnected
> > +     * on return, one of these state transition:
> > +     *   connected -> waiting_reply
> > +     *   * -> state unchange
> > +     *   but with the state of tx_buf changed
> > +     * on error: broken */
> > +{
> > +    int rc;
> > +    ssize_t r;
> > +
> > +    if (ev->qmp_state == qmp_state_connected) {
> > +        assert(!ev->tx_buf);
> > +        if (ev->msg) {
> 
> See my comments about the state `connected' and msg.
> 
> > +    /*
> > +     * We will send a file descriptor associated with a command on the
> > +     * first byte of this command.
> > +     */
> > +    if (ev->qmp_state == qmp_state_waiting_reply &&
> > +        ev->fd >= 0 &&
> > +        ev->tx_buf_off == 0) {
> 
> According to the doc comments, state waiting_reply might have tx_buf
> free, in which case it would try to execute this code.  I don't think
> that can be right.

When tx_buf is free, that function returns earlier. So this code isn't
executed.

> > +    if (ev->tx_buf_off == ev->tx_buf_len) {
> > +        free(ev->tx_buf);
> > +        ev->tx_buf = NULL;
> > +        ev->tx_buf_len = ev->tx_buf_off = 0;
> 
> This appears in _init too.  Maybe you want a qmp_ev_tx_buf_clear
> function ?

Sounds good.

> > +static int qmp_ev_callback_readable(libxl__egc *egc,
> > +                                    libxl__ev_qmp *ev, int fd)
> > +    /* !disconnected -> same state (with rx buffer updated)
> > +     * on error -> broken */
> > +{
> > +    EGC_GC;
> > +
> > +    while (1) {
> > +        ssize_t r;
> > +
> > +        /* Check if the buffer still have space, or increase size */
> > +        if (ev->rx_buf_size - ev->rx_buf_used < QMP_RECEIVE_BUFFER_SIZE) {
> > +            ev->rx_buf_size = max(ev->rx_buf_size * 2,
> > +                               (size_t)QMP_RECEIVE_BUFFER_SIZE * 2);
> 
> How about
> 
>   +            ev->rx_buf_size *= 2;
>   +            ev->rx_buf_size += QMP_RECEIVE_BUFFER_SIZE;
> 
> or some such ?  That's easier to read than a conditional.

That looks fine.

> > +            assert(ev->rx_buf_size <= QMP_MAX_SIZE_RX_BUF);
> > +            if (ev->rx_buf_size > QMP_MAX_SIZE_RX_BUF) {
> 
> I think the assert is redundant.
> 
> > +                LOGD(ERROR, ev->domid,
> > +                     "QMP receive buffer is too big (%ld > %lld)",
> > +                     ev->rx_buf_size, QMP_MAX_SIZE_RX_BUF);
> > +                return ERROR_BUFFERFULL;
> > +            }
> 
> Putting this error check between the change to rx_buf_size and the
> change to rx_buf obscures the essential connection between both
> changes, which must be indivisible.
> 
> It also results in a bug: if this error trips, rx_buf_size has been
> updated but rx_buf has not.  This is a dangerously invalid state.
> 
> This function tries to read everything that qemu produces and calls
> the whole thing a loss of qemu produces more than QMP_MAX_SIZE_RX_BUF.
> But perhaps qemu producing more than QMP_MAX_SIZE_RX_BUF output might
> not be the result of qemu going mad.  It might be the result of us
> having ignored it for a while, and qemu filling a buffer with event
> notifications which we might want to discard ?
> 
> Would it not be better to process messages as they arrive ?  Ie to put
> the attempt to find valid messages here inside this loop ?

Maybe.

We don't even needs to wait for a valid message and start parsing. yajl
allow to parse partial json input. But I don't know if it is a good idea
to start doing that.

But in anycase, it is probably better to look for valide messages before
returning an error with "there's too much to process".

> That might also allow the reduction of the maximum message size from
> 8Mby.  8Mby seems like quite a lot.

I don't know how big a message can be. I can certainly try to start QEMU
with 256 cpus and check the size of query-cpu's response.

> Of such a change would result in a possible return to the caller with
> the qmp fd having pending data to read.  We would have to make sure to
> actually call read again after sending a new command, in case the efd
> is edge triggered.
> 
> > +/* Handle messages received from QMP server */
> > +
> > +static int qmp_ev_get_next_msg(libxl__egc *egc, libxl__ev_qmp *ev,
> > +                               libxl__json_object **o_r)
> > +    /* Find a JSON object and store it in o_r.
> > +     * return ERROR_NOTFOUND if no object is found.
> > +     * `o_r` is allocated within `egc`.
> > +     *
> > +     * !disconnected -> same state (with rx buffer updated)
> > +     */
> > +{
> ...
> > +    /* Search for the end of a QMP message: "\r\n" */
> > +    end = memmem(ev->rx_buf, ev->rx_buf_used, "\r\n", 2);
> > +    if (!end)
> > +        return ERROR_NOTFOUND;
> > +    len = (end - ev->rx_buf) + 2;
> 
> In:
> 
>   Subject: Re: [PATCH v5 03/15] libxl_qmp: Implement fd callback and read data [and 1 more messages]
>   Date: Mon, 29 Oct 2018 17:31:59 +0000
> 
> I wrote:
> 
>   But I think you should treat only `\n' as the delimiter.  This will
>   considerably simplify the buffer handling.  (You should check and trim
>   the preceding `\r' before passing things to libxl__json_parse of
>   course.)
> 
> But you don't seem to have done that, or replied ?

This is still simpler than what we had before, and if I look for only
'\n', I would need a loop in cases where \n isn't precedded by \r. I
also check the whole buffer rather than new data, so looking for \r and
check if it is followed by a \n is similar.

As for triming the \r, this is done before the libxl__json_parse call.

> > +static int qmp_ev_handle_message(libxl__egc *egc,
> > +                                 libxl__ev_qmp *ev,
> > +                                 const libxl__json_object *resp)
> > +    /*
> > +     * This function will handle every messages sent by the QMP server.
> > +     * Return values:
> > +     *   < 0    libxl error code
> > +     *   0      success
> > +     *   1      success, but a user callback has been called,
> > +     *          `ev` should not be used anymore.
> ...
> > +    case LIBXL__QMP_MESSAGE_TYPE_QMP:
> ...
> > +        assert(!ev->tx_buf);
> > +        ev->tx_buf = qmp_prepare_cmd(gc, "qmp_capabilities", NULL,
> > +                              QMP_CAPABILITY_NEGOTIATION_MSGID,
> > +                              &ev->tx_buf_len);
> > +        ev->tx_buf_off = 0;
> 
> Why do you not use qmp_ev_prepare_cmd for this ?

Because qmp_ev_prepare_cmd does more like increasing the id. This
qmp_capabilities is a special command that is part of the
initialisation. I need to know which id is used so that I can track it
later and found out when QEMU is ready to receive other commands.

We could call qmp_ev_prepare_cmd, qmp_ev_prepare_user_cmd.

> > +    case LIBXL__QMP_MESSAGE_TYPE_RETURN:
> > +    case LIBXL__QMP_MESSAGE_TYPE_ERROR:
> ...
> > +        o = libxl__json_map_get("id", resp, JSON_INTEGER);
> > +        if (!o) {
> > +            /*
> > +             * If "id" isn't present, an error occur on the server before
> > +             * it has read the "id" provided by libxl.
> > +             */
> > +            qmp_ev_parse_error_messages(egc, ev, resp);
> > +            return ERROR_PROTOCOL_ERROR_QMP;
> 
> I think you wanted to return the error code from
> qmp_ev_parse_error_messages ?

Yeah, but then I've added to the public documentation that ERROR_QMP_*
means that ev_qmp is still Connected.

But here, if 'id' isn't in the message, something has gone wrong and I
don't think there is any point trying to continue. This isn't supposed
to happen when everything else has been fine from libxl's point of view.

The error code returned here will be passed to the user's callback.

> > +        id = libxl__json_object_get_integer(o);
> > +
> > +        if (id == QMP_CAPABILITY_NEGOTIATION_MSGID) {
> > +            /* We have a response to our qmp_capabilities cmd */
> > +            if (ev->qmp_state != qmp_state_capability_negotiation ||
> > +                type != LIBXL__QMP_MESSAGE_TYPE_RETURN)
> > +                goto out_unknown_id;
> > +            qmp_ev_set_state(gc, ev, qmp_state_connected);
> > +            return 0;
> > +        }
> 
> I'm a bit puzzled about the capability negotation, its id, etc.:
> 
> * Why does QMP_CAPABILITY_NEGOTIATION_MSGID exist at all ?  You
>   could just use a normal id, couldn't you ?  And you'd be able to
>   tell from your own state that it was the right value.

But I would then needs to track two different ids, one for the
qmp_capabilities command, and one for the user's command.

> * Why are we even doing capability negotation when we throw the answer
>   away ?

It is part of the protocol.

And qmp_capabilities doesn't return anything (just an empty object).

> * I think it would be better to check id == ev->id first, rather
>   than in each of these branches.

I don't think I follow. id==ev->id is only checked once.

> > +        if (ev->qmp_state == qmp_state_waiting_reply &&
> > +            id == ev->id) {
> > +            if (type == LIBXL__QMP_MESSAGE_TYPE_RETURN) {
> > +                response = libxl__json_map_get("return", resp, JSON_ANY);
> > +                rc = 0;
> > +            } else {
> > +                /* error message */
> > +                response = NULL;
> > +                rc = qmp_ev_parse_error_messages(egc, ev, resp);
> > +            }
> > +            qmp_ev_set_state(gc, ev, qmp_state_connected);
> > +            ev->callback(egc, ev, response, rc); /* must be last */
> > +            return 1;
> > +        }
> > +
> > +out_unknown_id:
> 
> We're just falling through here.  It seems weird to me.
> 
> I would expect this sequence of checks:
> 
>    1. Check id against our id; should be the same; if not, this
>       is an out of sequence reply which is not possible, so
>       call it a protocol error.
> 
>    2. Check if the thing is an error response; if it is, parse and
>       return the error message.
> 
>    2. It must be a RETURN.  (Maybe add an assert here.)  Check our
>       state, which should be capability_negotiation (in which case we
>       go to the next thing) or waiting_reply (in which case we're
>       done).  Otherwise it is a protocol error.

I don't think all ERROR should be protocol error. To me, it is fine if a
command returns an error, then tell the user about it. Also, there is no
needs to tear down the connection in this case.

As for the sequence of checks, I can try to follow it, it might look
weird because there is two differents ids to check, which id is valide
depends on the current state.

> > +int libxl__ev_qmp_send(libxl__gc *gc, libxl__ev_qmp *ev,
> > +                       const char *cmd, libxl__json_object *args)
> > +    /* disconnected -> connecting
> > +     * connected -> waiting_reply
> > +     * on error: disconnected */
> > +{
> > +    int rc;
> > +
> > +    LOGD(DEBUG, ev->domid, " ev %p, cmd '%s'", ev, cmd);
> > +
> > +    assert(ev->qmp_state == qmp_state_disconnected ||
> > +           ev->qmp_state == qmp_state_connected);
> > +
> > +    /* Connect to QEMU if not already connected */
> > +    rc = qmp_ev_connect(gc, ev);
> > +    if (rc)
> > +        goto out;
> > +
> > +    rc = qmp_ev_prepare_cmd(gc, ev, cmd, args);
> > +    if (rc)
> > +        goto out;
> > +
> > +    qmp_ev_ensure_reading_writing(gc, ev);
> > +
> > +out:
> 
> Please add
>        rc = 0;
> before out;
> 
> But you might prefer just to
>        return 0;
> since there is nothing allocated in this function that ought to be
> disposed (which is nice).

Will do.

Thanks,

-- 
Anthony PERARD

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-16 14:53                   ` Anthony PERARD
@ 2018-11-16 16:09                     ` Ian Jackson
  2018-11-16 17:10                       ` Anthony PERARD
  0 siblings, 1 reply; 15+ messages in thread
From: Ian Jackson @ 2018-11-16 16:09 UTC (permalink / raw)
  To: Anthony PERARD; +Cc: xen-devel, Wei Liu

Thanks for the reply.  I have deleted all the parts where we are in
agreement...


Anthony PERARD writes ("Re: [PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> On Thu, Nov 15, 2018 at 06:40:13PM +0000, Ian Jackson wrote:
> > It would be better if the id column stated something more useful than
> > `set'.  `next' maybe, where applicable ?
> 
> When 'set', id is the id of the user command we intend to send or have
> already sent and waiting for the associated reply. I'm not sure which
> word or set of word would describe enough. I might need to add a comment
> below the table.

You could use `sent', `next', alongside your existing `prev' ?

> > Typo `transmitting'.
> > 
> > A minor point, but indenting the rx_* things here would avoid them
> > lining up with `transmitting buffer' and misleading the eye.
> > 
> > Better still, have you considered moving this table into the struct
> > itself ?  You could put the table to the RHS of the actual member
> > definitions.  That gives slightly fewer places to look, although it
> > would involve a cross-reference from this wider state description to
> > the field's state tables.
> 
> Indeed, it feels kind of wrong to have the description of the internal
> states in two different places. There are some descriptions in the
> struct, but those are probably not needed.

I don't think I saw any _redundant_ text, but I could be wrong.
Certainly the stuff about which parts of the buffer are used, etc.,
are good and want to be kept.

> > ... oh, I see for rx_buf_used this is documented in the struct
> > itself.  But not for tx_buf_off.
> > 
> > Which leads me to say: the struct contains rx_buf_used but the comment
> > here talks about _off.
> 
> :(, I need to fix that.
> 
> And here is the description:
> 
> rx_buf_used: actual data in the buffer
> tx_buf_off: data already sent

Right, I found that in the end.

> > > + * - Allowed internal state transition:
> > > + * disconnected                         -> connecting
> > > + * connection                           -> capability_negotiation
> > > + * capability_negotiation/waiting_reply -> connected
> > > + * connected                            -> waiting_reply
> > > + * any                                  -> disconnected
> > 
> > This does not mention the state `broken' and it should.
> 
> Maybe:
>   any -> broken

Fine by me.  You want to mention broken -> disconnected too :-).


> > > +static int qmp_ev_prepare_cmd(libxl__gc *gc,
> > > +                              libxl__ev_qmp *ev,
> > > +                              const char *cmd,
> > > +                              const libxl__json_object *args)
...
> > > +    ev->id++;
> > > +    buf = qmp_prepare_cmd(gc, cmd, args, ev->id, &len);
> > > +    if (!buf) {
> > > +        return ERROR_FAIL;
> > > +    }
> > 
> > There is no doc comment for qmp_prepare_cmd saying whether it logs on
> > error.  So it doesn't ?  In which case maybe you should do so here.
> > 
> > Alternatively if it can only really fail due to memory allocation
> > failure, maybe it should return void (and call libxl__alloc_failed
> > on allocation failure) instead ?
> 
> So we can have the allocation been done by libxl's functions, as
> yajl_gen_alloc can use other functions that malloc/free/realloc, that
> would remove the allocation failure.

Right.

> But then I think that qmp_prepare_cmd could fail and return null if
> 'cmd' or 'args' contains bad json objects, or a wrong string.
> 
> So we still needs to detect failure.

OK.

> I log here as I can add the domid.

I'm not sure what you mean.  Where do you log ?  Not here.


> > > +/* Setup connection */
> > > +
> > > +static int qmp_ev_connect(libxl__gc *gc, libxl__ev_qmp *ev)
> > > +    /* disconnected -> connecting but with `msg` free
> > > +     * on error: disconnected
> > > +     * If the initial state isn't disconnected, then nothing is done */
> > 
> > Maybe it be better for this function to return `broken' on error ?
> 
> That not the case, but I could change to return broken on error, and let
> the caller clean the state.

That's what I mean.  That might simplify this function and the caller
probably has the relevant cleanup path already.  (I haven't checked.)


> > Also, I think it is anomalous that this function handles `broken' as a
> > no-op and claims success.  I think that is not really what a caller
> > might expect.  Maybe you want to forbid `broken' (with a corresponding
> > assert) or to permit it (calling dispose before doing the rest of the
> > work).
> 
> I'm thinking to change the definition and only allow this function to be
> called with the state disconnected, and have the caller check for the
> current state.

That's another possibility.


> > > +out:
> > > +    libxl__carefd_close(ev->qmp_cfd);
> > > +    ev->qmp_cfd = NULL;
> > > +    return rc;
> > 
> > If this function were permitted to leave the state `broken' on error,
> > this separate error code would not be needed.  Is there some reason
> > not to do that ?
> 
> Not really.

OK then :-).

> > > +    if (revents & POLLOUT) {
> > > +        rc = qmp_ev_callback_writable(gc, ev, fd);
> > ...
> > > +    if (revents & POLLIN) {
> > > +        rc = qmp_ev_callback_readable(egc, ev, fd);
> > > +        if (rc)
> > > +            goto out;
> > > +
> > > +        /* parse input */
> > 
> > I find it odd that this input parsing is not part of
> > qmp_ev_callback_readable.  What do you think about moving it there ?
> 
> I wanted to reduce the number of function calls between when a user
> callback is called and when ev_qmp's control passes outsite of ev_qmp's
> implementation. So I moved qmp_ev_handle_message() call here.

Why did you want to reduce the number of function calls ?

> > > +    /*
> > > +     * We will send a file descriptor associated with a command on the
> > > +     * first byte of this command.
> > > +     */
> > > +    if (ev->qmp_state == qmp_state_waiting_reply &&
> > > +        ev->fd >= 0 &&
> > > +        ev->tx_buf_off == 0) {
> > 
> > According to the doc comments, state waiting_reply might have tx_buf
> > free, in which case it would try to execute this code.  I don't think
> > that can be right.
> 
> When tx_buf is free, that function returns earlier. So this code isn't
> executed.

Oh yes, so it does.  But why are we entering this callback with a free
tx_buf at all ?  If we just return, we are likely to get called again,
ie an infinite loop.  This can only occur as a result of a bug ?
In which case maybe it should be an assert ?

> > > +            assert(ev->rx_buf_size <= QMP_MAX_SIZE_RX_BUF);
> > > +            if (ev->rx_buf_size > QMP_MAX_SIZE_RX_BUF) {
> > 
> > I think the assert is redundant.
> > 
> > > +                LOGD(ERROR, ev->domid,
> > > +                     "QMP receive buffer is too big (%ld > %lld)",
> > > +                     ev->rx_buf_size, QMP_MAX_SIZE_RX_BUF);
> > > +                return ERROR_BUFFERFULL;
> > > +            }
> > 
> > Putting this error check between the change to rx_buf_size and the
> > change to rx_buf obscures the essential connection between both
> > changes, which must be indivisible.
> > 
> > It also results in a bug: if this error trips, rx_buf_size has been
> > updated but rx_buf has not.  This is a dangerously invalid state.
> > 
> > This function tries to read everything that qemu produces and calls
> > the whole thing a loss of qemu produces more than QMP_MAX_SIZE_RX_BUF.
> > But perhaps qemu producing more than QMP_MAX_SIZE_RX_BUF output might
> > not be the result of qemu going mad.  It might be the result of us
> > having ignored it for a while, and qemu filling a buffer with event
> > notifications which we might want to discard ?
> > 
> > Would it not be better to process messages as they arrive ?  Ie to put
> > the attempt to find valid messages here inside this loop ?
> 
> Maybe.
> 
> We don't even needs to wait for a valid message and start parsing. yajl
> allow to parse partial json input. But I don't know if it is a good idea
> to start doing that.

I think parsing partial json messages would be quite complex and not
worthwhile - because we expect to get short messages.

> But in anycase, it is probably better to look for valide messages before
> returning an error with "there's too much to process".

Right.

> > That might also allow the reduction of the maximum message size from
> > 8Mby.  8Mby seems like quite a lot.
> 
> I don't know how big a message can be. I can certainly try to start QEMU
> with 256 cpus and check the size of query-cpu's response.

That is probably worthwhile but I hope it will be nowhere near 8Mby...

> > I wrote:
> > 
> >   But I think you should treat only `\n' as the delimiter.  This will
> >   considerably simplify the buffer handling.  (You should check and trim
> >   the preceding `\r' before passing things to libxl__json_parse of
> >   course.)
> > 
> > But you don't seem to have done that, or replied ?
> 
> This is still simpler than what we had before, and if I look for only
> '\n', I would need a loop in cases where \n isn't precedded by \r. I
> also check the whole buffer rather than new data, so looking for \r and
> check if it is followed by a \n is similar.

I think a bare \n is not legal and should be treated as a protocol
error.  Don't you agree ?  Given that, you can search for \n, and if
it is not preceded by \r, call it an error.

But maybe you prefer it the way you have it.

> As for triming the \r, this is done before the libxl__json_parse call.

Right, sorry, my parenthetical comment was just saying that if you
change it to search for \n you do still need to strip the \r.

> > > +    case LIBXL__QMP_MESSAGE_TYPE_QMP:
> > ...
> > > +        assert(!ev->tx_buf);
> > > +        ev->tx_buf = qmp_prepare_cmd(gc, "qmp_capabilities", NULL,
> > > +                              QMP_CAPABILITY_NEGOTIATION_MSGID,
> > > +                              &ev->tx_buf_len);
> > > +        ev->tx_buf_off = 0;
> > 
> > Why do you not use qmp_ev_prepare_cmd for this ?
> 
> Because qmp_ev_prepare_cmd does more like increasing the id. This
> qmp_capabilities is a special command that is part of the
> initialisation. I need to know which id is used so that I can track it
> later and found out when QEMU is ready to receive other commands.

But see below about the id.  I think you should be using the normal
id.

In another thread you were talking about possibly trying to reuse an
existing session and putting the pid in the id or something.  In this
case a fixed id for the capabilities command is not good either.

Let me bring forward the part where we handle the resuls:

> > > +        id = libxl__json_object_get_integer(o);
> > > +
> > > +        if (id == QMP_CAPABILITY_NEGOTIATION_MSGID) {
> > > +            /* We have a response to our qmp_capabilities cmd */
> > > +            if (ev->qmp_state != qmp_state_capability_negotiation ||
> > > +                type != LIBXL__QMP_MESSAGE_TYPE_RETURN)
> > > +                goto out_unknown_id;
> > > +            qmp_ev_set_state(gc, ev, qmp_state_connected);
> > > +            return 0;
> > > +        }
> > 
> > I'm a bit puzzled about the capability negotation, its id, etc.:
> > 
> > * Why does QMP_CAPABILITY_NEGOTIATION_MSGID exist at all ?  You
> >   could just use a normal id, couldn't you ?  And you'd be able to
> >   tell from your own state that it was the right value.
> 
> But I would then needs to track two different ids, one for the
> qmp_capabilities command, and one for the user's command.

But you only construct the id when you prepare the command into
tx_buf.  So you only have one id at a time: the id of the most
recently prepared command.  Or am I wrong ?

...

I have looked at this code again and I was wrong.  You format the
user's message, with id, right at the beginning.  (The sequencing of
events in the code structure is slightly odd, in that the capabilities
message is formatted after the user's message, but sent beforehand.)

I still think for the reasons above that you probably can't have a
fixed QMP_CAPABILITY_NEGOTIATION_MSGID because we may want to be able
to reuse an old transport connection in the future.

So that means you do need two ids.  (I think you can't defer
formatting the user's message, baking an id into it, because the
lifetime of the callers' json object is too short for you to save it.)

I'm not sure I can see sensible a way of doing this that doesn't have
*three* id variables:
  - a counter for generating new ids
  - the id put in the capabilities command
  - the id put in the user's command

> We could call qmp_ev_prepare_cmd, qmp_ev_prepare_user_cmd.
> 
> > > +    case LIBXL__QMP_MESSAGE_TYPE_RETURN:
> > > +    case LIBXL__QMP_MESSAGE_TYPE_ERROR:
> > ...
> > > +        o = libxl__json_map_get("id", resp, JSON_INTEGER);
> > > +        if (!o) {
> > > +            /*
> > > +             * If "id" isn't present, an error occur on the server before
> > > +             * it has read the "id" provided by libxl.
> > > +             */
> > > +            qmp_ev_parse_error_messages(egc, ev, resp);
> > > +            return ERROR_PROTOCOL_ERROR_QMP;
> > 
> > I think you wanted to return the error code from
> > qmp_ev_parse_error_messages ?
> 
> Yeah, but then I've added to the public documentation that ERROR_QMP_*
> means that ev_qmp is still Connected.

Oh.  Right.  I see.  Now that you have explained I think the code is
correct.  Can you add a comment about this ?  Something like this:
   /*
    * Deliberately squash all errors into ERROR_PROTOCOL_ERROR_QMP.
    * qmp_ev_parse_error_messages will only return that, or
    * ERROR_QMP_*; but ERROR_QMP_* is reserved for errors resulting
    * from the caller's command, and when we return that we promise
    * that the ev_qmp is Connected.
    */

> > * Why are we even doing capability negotation when we throw the answer
> >   away ?
> 
> It is part of the protocol.
> 
> And qmp_capabilities doesn't return anything (just an empty object).

Oh, is the purpose to inform qemu what our capabilities are ?

> > * I think it would be better to check id == ev->id first, rather
> >   than in each of these branches.
> 
> I don't think I follow. id==ev->id is only checked once.

You check id against QMP_CAPABILITY_NEGOTIATION_MSGID separately from
the check against ev->id.  But now that I see you have formatted both
commands and need to have both ids, I'm not sure that comment of mine
makes sense.

> I don't think all ERROR should be protocol error. To me, it is fine if a
> command returns an error, then tell the user about it. Also, there is no
> needs to tear down the connection in this case.

Sorry, yes.

> As for the sequence of checks, I can try to follow it, it might look
> weird because there is two differents ids to check, which id is valide
> depends on the current state.

I think you are probably right about that.


Thanks,
Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-16 16:09                     ` Ian Jackson
@ 2018-11-16 17:10                       ` Anthony PERARD
  2018-11-16 18:16                         ` Ian Jackson
  0 siblings, 1 reply; 15+ messages in thread
From: Anthony PERARD @ 2018-11-16 17:10 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel, Wei Liu

On Fri, Nov 16, 2018 at 04:09:03PM +0000, Ian Jackson wrote:
> Anthony PERARD writes ("Re: [PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> > On Thu, Nov 15, 2018 at 06:40:13PM +0000, Ian Jackson wrote:
> > > It would be better if the id column stated something more useful than
> > > `set'.  `next' maybe, where applicable ?
> > 
> > When 'set', id is the id of the user command we intend to send or have
> > already sent and waiting for the associated reply. I'm not sure which
> > word or set of word would describe enough. I might need to add a comment
> > below the table.
> 
> You could use `sent', `next', alongside your existing `prev' ?

Will do.

> > > > + * - Allowed internal state transition:
> > > > + * disconnected                         -> connecting
> > > > + * connection                           -> capability_negotiation
> > > > + * capability_negotiation/waiting_reply -> connected
> > > > + * connected                            -> waiting_reply
> > > > + * any                                  -> disconnected
> > > 
> > > This does not mention the state `broken' and it should.
> > 
> > Maybe:
> >   any -> broken
> 
> Fine by me.  You want to mention broken -> disconnected too :-).

Will, that part of any->disconnected, but it doesn't hurt to add it.

> > > > +static int qmp_ev_prepare_cmd(libxl__gc *gc,
> > > > +                              libxl__ev_qmp *ev,
> > > > +                              const char *cmd,
> > > > +                              const libxl__json_object *args)
> ...
> > > > +    ev->id++;
> > > > +    buf = qmp_prepare_cmd(gc, cmd, args, ev->id, &len);
> > > > +    if (!buf) {
> > > > +        return ERROR_FAIL;
> > > > +    }
> > > 
> > > There is no doc comment for qmp_prepare_cmd saying whether it logs on
> > > error.  So it doesn't ?  In which case maybe you should do so here.
> > > 
> > > Alternatively if it can only really fail due to memory allocation
> > > failure, maybe it should return void (and call libxl__alloc_failed
> > > on allocation failure) instead ?
> > 
> > So we can have the allocation been done by libxl's functions, as
> > yajl_gen_alloc can use other functions that malloc/free/realloc, that
> > would remove the allocation failure.
> 
> Right.
> 
> > But then I think that qmp_prepare_cmd could fail and return null if
> > 'cmd' or 'args' contains bad json objects, or a wrong string.
> > 
> > So we still needs to detect failure.
> 
> OK.
> 
> > I log here as I can add the domid.
> 
> I'm not sure what you mean.  Where do you log ?  Not here.

Not yet :). There is maybe missing words in my sentence...

> > > > +/* Setup connection */
> > > > +
> > > > +static int qmp_ev_connect(libxl__gc *gc, libxl__ev_qmp *ev)
> > > > +    /* disconnected -> connecting but with `msg` free
> > > > +     * on error: disconnected
> > > > +     * If the initial state isn't disconnected, then nothing is done */
> > > 
> > > Maybe it be better for this function to return `broken' on error ?
> > 
> > That not the case, but I could change to return broken on error, and let
> > the caller clean the state.
> 
> That's what I mean.  That might simplify this function and the caller
> probably has the relevant cleanup path already.  (I haven't checked.)

It does, and yes, that will make the function simpler.

> > > > +    if (revents & POLLOUT) {
> > > > +        rc = qmp_ev_callback_writable(gc, ev, fd);
> > > ...
> > > > +    if (revents & POLLIN) {
> > > > +        rc = qmp_ev_callback_readable(egc, ev, fd);
> > > > +        if (rc)
> > > > +            goto out;
> > > > +
> > > > +        /* parse input */
> > > 
> > > I find it odd that this input parsing is not part of
> > > qmp_ev_callback_readable.  What do you think about moving it there ?
> > 
> > I wanted to reduce the number of function calls between when a user
> > callback is called and when ev_qmp's control passes outsite of ev_qmp's
> > implementation. So I moved qmp_ev_handle_message() call here.
> 
> Why did you want to reduce the number of function calls ?

Well, there is a need to track that `ev` might be discarded, and the
only way left is via a return value. It is probably easier to follow if
less function have the possibility that a user callback have been
called.

> > > > +    /*
> > > > +     * We will send a file descriptor associated with a command on the
> > > > +     * first byte of this command.
> > > > +     */
> > > > +    if (ev->qmp_state == qmp_state_waiting_reply &&
> > > > +        ev->fd >= 0 &&
> > > > +        ev->tx_buf_off == 0) {
> > > 
> > > According to the doc comments, state waiting_reply might have tx_buf
> > > free, in which case it would try to execute this code.  I don't think
> > > that can be right.
> > 
> > When tx_buf is free, that function returns earlier. So this code isn't
> > executed.
> 
> Oh yes, so it does.  But why are we entering this callback with a free
> tx_buf at all ?  If we just return, we are likely to get called again,
> ie an infinite loop.  This can only occur as a result of a bug ?
> In which case maybe it should be an assert ?

Yes, I can probably add the assert.

> > > I wrote:
> > > 
> > >   But I think you should treat only `\n' as the delimiter.  This will
> > >   considerably simplify the buffer handling.  (You should check and trim
> > >   the preceding `\r' before passing things to libxl__json_parse of
> > >   course.)
> > > 
> > > But you don't seem to have done that, or replied ?
> > 
> > This is still simpler than what we had before, and if I look for only
> > '\n', I would need a loop in cases where \n isn't precedded by \r. I
> > also check the whole buffer rather than new data, so looking for \r and
> > check if it is followed by a \n is similar.
> 
> I think a bare \n is not legal and should be treated as a protocol
> error.  Don't you agree ?  Given that, you can search for \n, and if
> it is not preceded by \r, call it an error.

I can't find anything that say bare \n are not legal. \n is part of
rfc7159, which the qmp-spec mentions when speaking about JSON data
structures. It is even possible to ask QEMU to add bare \n, this is done
with pretty=on.

> > > > +    case LIBXL__QMP_MESSAGE_TYPE_QMP:
> > > ...
> > > > +        assert(!ev->tx_buf);
> > > > +        ev->tx_buf = qmp_prepare_cmd(gc, "qmp_capabilities", NULL,
> > > > +                              QMP_CAPABILITY_NEGOTIATION_MSGID,
> > > > +                              &ev->tx_buf_len);
> > > > +        ev->tx_buf_off = 0;
> > > 
> > > Why do you not use qmp_ev_prepare_cmd for this ?
> > 
> > Because qmp_ev_prepare_cmd does more like increasing the id. This
> > qmp_capabilities is a special command that is part of the
> > initialisation. I need to know which id is used so that I can track it
> > later and found out when QEMU is ready to receive other commands.
> 
> But see below about the id.  I think you should be using the normal
> id.
> 
> In another thread you were talking about possibly trying to reuse an
> existing session and putting the pid in the id or something.  In this
> case a fixed id for the capabilities command is not good either.
> 
> Let me bring forward the part where we handle the resuls:
> 
> > > > +        id = libxl__json_object_get_integer(o);
> > > > +
> > > > +        if (id == QMP_CAPABILITY_NEGOTIATION_MSGID) {
> > > > +            /* We have a response to our qmp_capabilities cmd */
> > > > +            if (ev->qmp_state != qmp_state_capability_negotiation ||
> > > > +                type != LIBXL__QMP_MESSAGE_TYPE_RETURN)
> > > > +                goto out_unknown_id;
> > > > +            qmp_ev_set_state(gc, ev, qmp_state_connected);
> > > > +            return 0;
> > > > +        }
> > > 
> > > I'm a bit puzzled about the capability negotation, its id, etc.:
> > > 
> > > * Why does QMP_CAPABILITY_NEGOTIATION_MSGID exist at all ?  You
> > >   could just use a normal id, couldn't you ?  And you'd be able to
> > >   tell from your own state that it was the right value.
> > 
> > But I would then needs to track two different ids, one for the
> > qmp_capabilities command, and one for the user's command.
> 
> But you only construct the id when you prepare the command into
> tx_buf.  So you only have one id at a time: the id of the most
> recently prepared command.  Or am I wrong ?
> 
> ...
> 
> I have looked at this code again and I was wrong.  You format the
> user's message, with id, right at the beginning.  (The sequencing of
> events in the code structure is slightly odd, in that the capabilities
> message is formatted after the user's message, but sent beforehand.)
> 
> I still think for the reasons above that you probably can't have a
> fixed QMP_CAPABILITY_NEGOTIATION_MSGID because we may want to be able
> to reuse an old transport connection in the future.
> 
> So that means you do need two ids.  (I think you can't defer
> formatting the user's message, baking an id into it, because the
> lifetime of the callers' json object is too short for you to save it.)
> 
> I'm not sure I can see sensible a way of doing this that doesn't have
> *three* id variables:
>   - a counter for generating new ids
>   - the id put in the capabilities command
>   - the id put in the user's command

That sounds fine.

> > We could call qmp_ev_prepare_cmd, qmp_ev_prepare_user_cmd.
> > 
> > > > +    case LIBXL__QMP_MESSAGE_TYPE_RETURN:
> > > > +    case LIBXL__QMP_MESSAGE_TYPE_ERROR:
> > > ...
> > > > +        o = libxl__json_map_get("id", resp, JSON_INTEGER);
> > > > +        if (!o) {
> > > > +            /*
> > > > +             * If "id" isn't present, an error occur on the server before
> > > > +             * it has read the "id" provided by libxl.
> > > > +             */
> > > > +            qmp_ev_parse_error_messages(egc, ev, resp);
> > > > +            return ERROR_PROTOCOL_ERROR_QMP;
> > > 
> > > I think you wanted to return the error code from
> > > qmp_ev_parse_error_messages ?
> > 
> > Yeah, but then I've added to the public documentation that ERROR_QMP_*
> > means that ev_qmp is still Connected.
> 
> Oh.  Right.  I see.  Now that you have explained I think the code is
> correct.  Can you add a comment about this ?  Something like this:
>    /*
>     * Deliberately squash all errors into ERROR_PROTOCOL_ERROR_QMP.
>     * qmp_ev_parse_error_messages will only return that, or
>     * ERROR_QMP_*; but ERROR_QMP_* is reserved for errors resulting
>     * from the caller's command, and when we return that we promise
>     * that the ev_qmp is Connected.
>     */

Will do.

> > > * Why are we even doing capability negotation when we throw the answer
> > >   away ?
> > 
> > It is part of the protocol.
> > 
> > And qmp_capabilities doesn't return anything (just an empty object).
> 
> Oh, is the purpose to inform qemu what our capabilities are ?

It actually inform qemu of the capabilities we want. The QMP server will
not accept any other commands before the client execute qmp_capabilities.

The conversation between QEMU and libxl goes like this:

QEMU: Hi, I'm QEMU 3.0, I can do "oob".
libxl: Hi, I don't need any capabilities.
QEMU: Thanks, you can now run any other command you like.
libxl sends the user command.

Or in term of edited JSON:
QEMU:  { "QMP": {"version": XXX, "capabilities": ["oob"] } }
libxl: { "execute": "qmp_capabilities", "arguments": {} }
QEMU:  { "return": {}}

And here is a link to the example in the QMP spec document
(3. QMP Examples):
https://git.qemu.org/?p=qemu.git;a=blob;f=docs/interop/qmp-spec.txt;h=8f7da0245d51447be7df2b3d4b105bad9fbec0b3;hb=HEAD#l244

Maybe I should add somewhere in a comment where to find the QMP spec,
even so we already have "This file implement a client for QMP (QEMU
Monitor Protocol). For the Specification, see in the QEMU repository."
at the top of libxl_qmp.c


Thanks,

-- 
Anthony PERARD

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*
  2018-11-16 17:10                       ` Anthony PERARD
@ 2018-11-16 18:16                         ` Ian Jackson
  0 siblings, 0 replies; 15+ messages in thread
From: Ian Jackson @ 2018-11-16 18:16 UTC (permalink / raw)
  To: Anthony PERARD; +Cc: xen-devel, Wei Liu

Anthony PERARD writes ("Re: [PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> On Fri, Nov 16, 2018 at 04:09:03PM +0000, Ian Jackson wrote:
> > Anthony PERARD writes ("Re: [PATCH v6.2 05/11] libxl_qmp: Implementation of libxl__ev_qmp_*"):
> > > I wanted to reduce the number of function calls between when a user
> > > callback is called and when ev_qmp's control passes outsite of ev_qmp's
> > > implementation. So I moved qmp_ev_handle_message() call here.
> > 
> > Why did you want to reduce the number of function calls ?
> 
> Well, there is a need to track that `ev` might be discarded, and the
> only way left is via a return value. It is probably easier to follow if
> less function have the possibility that a user callback have been
> called.

Oh, I see.  Hrm.  OK, I think I would have done it the other way, but
that makes sense.

> > I think a bare \n is not legal and should be treated as a protocol
> > error.  Don't you agree ?  Given that, you can search for \n, and if
> > it is not preceded by \r, call it an error.
> 
> I can't find anything that say bare \n are not legal. \n is part of
> rfc7159, which the qmp-spec mentions when speaking about JSON data
> structures. It is even possible to ask QEMU to add bare \n, this is done
> with pretty=on.

I think I was wrong.  So thanks for explaining and you should leave
this code the way it is.

> > I'm not sure I can see sensible a way of doing this that doesn't have
> > *three* id variables:
> >   - a counter for generating new ids
> >   - the id put in the capabilities command
> >   - the id put in the user's command
> 
> That sounds fine.

If you have another approach to suggest please go ahead.

> > Oh, is the purpose to inform qemu what our capabilities are ?
> 
> It actually inform qemu of the capabilities we want. The QMP server will
> not accept any other commands before the client execute qmp_capabilities.
> 
> The conversation between QEMU and libxl goes like this:
> 
> QEMU: Hi, I'm QEMU 3.0, I can do "oob".
> libxl: Hi, I don't need any capabilities.
> QEMU: Thanks, you can now run any other command you like.
> libxl sends the user command.
> 
> Or in term of edited JSON:
> QEMU:  { "QMP": {"version": XXX, "capabilities": ["oob"] } }
> libxl: { "execute": "qmp_capabilities", "arguments": {} }
> QEMU:  { "return": {}}

Thanks for the explanation.

> And here is a link to the example in the QMP spec document
> (3. QMP Examples):
> https://git.qemu.org/?p=qemu.git;a=blob;f=docs/interop/qmp-spec.txt;h=8f7da0245d51447be7df2b3d4b105bad9fbec0b3;hb=HEAD#l244
> 
> Maybe I should add somewhere in a comment where to find the QMP spec,
> even so we already have "This file implement a client for QMP (QEMU
> Monitor Protocol). For the Specification, see in the QEMU repository."
> at the top of libxl_qmp.c

Sorry for pontificating without having read the spec myself ...  but
yes a link would be nice.

Thanks,
Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2018-11-16 18:16 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-13 12:28 [PATCH v6.1 05/11] libxl_qmp: Implementation of libxl__ev_qmp_* Anthony PERARD
2018-11-13 13:14 ` Ian Jackson
2018-11-13 15:42   ` Anthony PERARD
2018-11-13 16:10     ` Ian Jackson
2018-11-14 11:40       ` Anthony PERARD
2018-11-14 15:43         ` Ian Jackson
2018-11-14 15:49           ` Ian Jackson
2018-11-14 16:33           ` Anthony PERARD
2018-11-14 17:07             ` Ian Jackson
2018-11-15 11:15               ` [PATCH v6.2 " Anthony PERARD
2018-11-15 18:40                 ` Ian Jackson
2018-11-16 14:53                   ` Anthony PERARD
2018-11-16 16:09                     ` Ian Jackson
2018-11-16 17:10                       ` Anthony PERARD
2018-11-16 18:16                         ` Ian Jackson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.