All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PULL 05/47] contrib: add libvhost-user
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 17:05   ` Daniel P. Berrange
  2016-11-10 16:12 ` [Qemu-devel] [PULL 01/47] tests/vhost-user-bridge: remove false comment Michael S. Tsirkin
                   ` (46 subsequent siblings)
  47 siblings, 1 reply; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Marc-André Lureau, Felipe Franciosi,
	Paolo Bonzini, Stefan Hajnoczi, Eric Blake, Markus Armbruster,
	Daniel P. Berrange

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Add a library to help implementing vhost-user backend (or slave).

Dealing with vhost-user as an application developer isn't so easy: you
have all the trouble with any protocol: validation, unix ancillary data,
shared memory, eventfd, logging, and on top of that you need to deal
with virtio queues, if possible efficiently.

qemu test has a nice vhost-user testing application vhost-user-bridge,
which implements most of vhost-user, and virtio.c which implements
virtqueues manipulation. Based on these two, I tried to make a simple
library, reusable for tests or development of new vhost-user scenarios.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
[Felipe: set used_idx copy on SET_VRING_ADDR and update shadow avail idx
 on SET_VRING_BASE]
Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Makefile                              |    1 +
 Makefile.objs                         |    2 +-
 contrib/libvhost-user/libvhost-user.h |  435 ++++++++++
 contrib/libvhost-user/libvhost-user.c | 1499 +++++++++++++++++++++++++++++++++
 contrib/libvhost-user/Makefile.objs   |    1 +
 5 files changed, 1937 insertions(+), 1 deletion(-)
 create mode 100644 contrib/libvhost-user/libvhost-user.h
 create mode 100644 contrib/libvhost-user/libvhost-user.c
 create mode 100644 contrib/libvhost-user/Makefile.objs

diff --git a/Makefile b/Makefile
index 474cc5e..68accb9 100644
--- a/Makefile
+++ b/Makefile
@@ -149,6 +149,7 @@ dummy := $(call unnest-vars,, \
                 qga-obj-y \
                 ivshmem-client-obj-y \
                 ivshmem-server-obj-y \
+                libvhost-user-obj-y \
                 qga-vss-dll-obj-y \
                 block-obj-y \
                 block-obj-m \
diff --git a/Makefile.objs b/Makefile.objs
index 06f74b8..ecd6576 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -116,7 +116,7 @@ qga-vss-dll-obj-y = qga/
 # contrib
 ivshmem-client-obj-y = contrib/ivshmem-client/
 ivshmem-server-obj-y = contrib/ivshmem-server/
-
+libvhost-user-obj-y = contrib/libvhost-user/
 
 ######################################################################
 trace-events-y = trace-events
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
new file mode 100644
index 0000000..156b50e
--- /dev/null
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -0,0 +1,435 @@
+/*
+ * Vhost User library
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Victor Kaplansky <victork@redhat.com>
+ *  Marc-André Lureau <mlureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBVHOST_USER_H
+#define LIBVHOST_USER_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/vhost.h>
+#include "standard-headers/linux/virtio_ring.h"
+
+/* Based on qemu/hw/virtio/vhost-user.c */
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_LOG_PAGE 4096
+
+#define VHOST_MAX_NR_VIRTQUEUE 8
+#define VIRTQUEUE_MAX_SIZE 1024
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+enum VhostUserProtocolFeature {
+    VHOST_USER_PROTOCOL_F_MQ = 0,
+    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+    VHOST_USER_PROTOCOL_F_RARP = 2,
+
+    VHOST_USER_PROTOCOL_F_MAX
+};
+
+#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
+
+typedef enum VhostUserRequest {
+    VHOST_USER_NONE = 0,
+    VHOST_USER_GET_FEATURES = 1,
+    VHOST_USER_SET_FEATURES = 2,
+    VHOST_USER_SET_OWNER = 3,
+    VHOST_USER_RESET_OWNER = 4,
+    VHOST_USER_SET_MEM_TABLE = 5,
+    VHOST_USER_SET_LOG_BASE = 6,
+    VHOST_USER_SET_LOG_FD = 7,
+    VHOST_USER_SET_VRING_NUM = 8,
+    VHOST_USER_SET_VRING_ADDR = 9,
+    VHOST_USER_SET_VRING_BASE = 10,
+    VHOST_USER_GET_VRING_BASE = 11,
+    VHOST_USER_SET_VRING_KICK = 12,
+    VHOST_USER_SET_VRING_CALL = 13,
+    VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+    VHOST_USER_GET_QUEUE_NUM = 17,
+    VHOST_USER_SET_VRING_ENABLE = 18,
+    VHOST_USER_SEND_RARP = 19,
+    VHOST_USER_INPUT_GET_CONFIG = 20,
+    VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+    uint64_t guest_phys_addr;
+    uint64_t memory_size;
+    uint64_t userspace_addr;
+    uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+    uint32_t nregions;
+    uint32_t padding;
+    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+    uint64_t mmap_size;
+    uint64_t mmap_offset;
+} VhostUserLog;
+
+#if defined(_WIN32)
+# define VU_PACKED __attribute__((gcc_struct, packed))
+#else
+# define VU_PACKED __attribute__((packed))
+#endif
+
+typedef struct VhostUserMsg {
+    VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     (0x3)
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+
+    union {
+#define VHOST_USER_VRING_IDX_MASK   (0xff)
+#define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
+        uint64_t u64;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        VhostUserMemory memory;
+        VhostUserLog log;
+    } payload;
+
+    int fds[VHOST_MEMORY_MAX_NREGIONS];
+    int fd_num;
+    uint8_t *data;
+} VU_PACKED VhostUserMsg;
+
+typedef struct VuDevRegion {
+    /* Guest Physical address. */
+    uint64_t gpa;
+    /* Memory region size. */
+    uint64_t size;
+    /* QEMU virtual address (userspace). */
+    uint64_t qva;
+    /* Starting offset in our mmaped space. */
+    uint64_t mmap_offset;
+    /* Start address of mmaped space. */
+    uint64_t mmap_addr;
+} VuDevRegion;
+
+typedef struct VuDev VuDev;
+
+typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
+typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
+typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
+                                  int *do_reply);
+typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
+
+typedef struct VuDevIface {
+    /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
+    vu_get_features_cb get_features;
+    /* enable vhost implementation features */
+    vu_set_features_cb set_features;
+    /* get the protocol feature bitmask from the underlying vhost
+     * implementation */
+    vu_get_features_cb get_protocol_features;
+    /* enable protocol features in the underlying vhost implementation. */
+    vu_set_features_cb set_protocol_features;
+    /* process_msg is called for each vhost-user message received */
+    /* skip libvhost-user processing if return value != 0 */
+    vu_process_msg_cb process_msg;
+    /* tells when queues can be processed */
+    vu_queue_set_started_cb queue_set_started;
+} VuDevIface;
+
+typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
+
+typedef struct VuRing {
+    unsigned int num;
+    struct vring_desc *desc;
+    struct vring_avail *avail;
+    struct vring_used *used;
+    uint64_t log_guest_addr;
+    uint32_t flags;
+} VuRing;
+
+typedef struct VuVirtq {
+    VuRing vring;
+
+    /* Next head to pop */
+    uint16_t last_avail_idx;
+
+    /* Last avail_idx read from VQ. */
+    uint16_t shadow_avail_idx;
+
+    uint16_t used_idx;
+
+    /* Last used index value we have signalled on */
+    uint16_t signalled_used;
+
+    /* Last used index value we have signalled on */
+    bool signalled_used_valid;
+
+    /* Notification enabled? */
+    bool notification;
+
+    int inuse;
+
+    vu_queue_handler_cb handler;
+
+    int call_fd;
+    int kick_fd;
+    int err_fd;
+    unsigned int enable;
+    bool started;
+} VuVirtq;
+
+enum VuWatchCondtion {
+    VU_WATCH_IN = 1 << 0,
+    VU_WATCH_OUT = 1 << 1,
+    VU_WATCH_PRI = 1 << 2,
+    VU_WATCH_ERR = 1 << 3,
+    VU_WATCH_HUP = 1 << 4,
+};
+
+typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
+typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
+typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
+                                 vu_watch_cb cb, void *data);
+typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
+
+struct VuDev {
+    int sock;
+    uint32_t nregions;
+    VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+    VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
+    int log_call_fd;
+    uint64_t log_size;
+    uint8_t *log_table;
+    uint64_t features;
+    uint64_t protocol_features;
+    bool broken;
+
+    /* @set_watch: add or update the given fd to the watch set,
+     * call cb when condition is met */
+    vu_set_watch_cb set_watch;
+
+    /* @remove_watch: remove the given fd from the watch set */
+    vu_remove_watch_cb remove_watch;
+
+    /* @panic: encountered an unrecoverable error, you may try to
+     * re-initialize */
+    vu_panic_cb panic;
+    const VuDevIface *iface;
+};
+
+typedef struct VuVirtqElement {
+    unsigned int index;
+    unsigned int out_num;
+    unsigned int in_num;
+    struct iovec *in_sg;
+    struct iovec *out_sg;
+} VuVirtqElement;
+
+/**
+ * vu_init:
+ * @dev: a VuDev context
+ * @socket: the socket connected to vhost-user master
+ * @panic: a panic callback
+ * @set_watch: a set_watch callback
+ * @remove_watch: a remove_watch callback
+ * @iface: a VuDevIface structure with vhost-user device callbacks
+ *
+ * Intializes a VuDev vhost-user context.
+ **/
+void vu_init(VuDev *dev,
+             int socket,
+             vu_panic_cb panic,
+             vu_set_watch_cb set_watch,
+             vu_remove_watch_cb remove_watch,
+             const VuDevIface *iface);
+
+
+/**
+ * vu_deinit:
+ * @dev: a VuDev context
+ *
+ * Cleans up the VuDev context
+ */
+void vu_deinit(VuDev *dev);
+
+/**
+ * vu_dispatch:
+ * @dev: a VuDev context
+ *
+ * Process one vhost-user message.
+ *
+ * Returns: TRUE on success, FALSE on failure.
+ */
+bool vu_dispatch(VuDev *dev);
+
+/**
+ * vu_gpa_to_va:
+ * @dev: a VuDev context
+ * @guest_addr: guest address
+ *
+ * Translate a guest address to a pointer. Returns NULL on failure.
+ */
+void *vu_gpa_to_va(VuDev *dev, uint64_t guest_addr);
+
+/**
+ * vu_get_queue:
+ * @dev: a VuDev context
+ * @qidx: queue index
+ *
+ * Returns the queue number @qidx.
+ */
+VuVirtq *vu_get_queue(VuDev *dev, int qidx);
+
+/**
+ * vu_set_queue_handler:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @handler: the queue handler callback
+ *
+ * Set the queue handler. This function may be called several times
+ * for the same queue. If called with NULL @handler, the handler is
+ * removed.
+ */
+void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
+                          vu_queue_handler_cb handler);
+
+
+/**
+ * vu_queue_set_notification:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @enable: state
+ *
+ * Set whether the queue notifies (via event index or interrupt)
+ */
+void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
+
+/**
+ * vu_queue_enabled:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Returns: whether the queue is enabled.
+ */
+bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_enabled:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Returns: whether the queue is empty.
+ */
+int vu_queue_empty(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_notify:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Request to notify the queue via callfd (skipped if unnecessary)
+ */
+void vu_queue_notify(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_pop:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @sz: the size of struct to return (must be >= VuVirtqElement)
+ *
+ * Returns: a VuVirtqElement filled from the queue or NULL.
+ */
+void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
+
+/**
+ * vu_queue_rewind:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @num: number of elements to push back
+ *
+ * Pretend that elements weren't popped from the virtqueue.  The next
+ * virtqueue_pop() will refetch the oldest element.
+ *
+ * Returns: true on success, false if @num is greater than the number of in use
+ * elements.
+ */
+bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
+
+/**
+ * vu_queue_fill:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: a VuVirtqElement
+ * @len: length in bytes to write
+ * @idx: optional offset for the used ring index (0 in general)
+ *
+ * Fill the used ring with @elem element.
+ */
+void vu_queue_fill(VuDev *dev, VuVirtq *vq,
+                   const VuVirtqElement *elem,
+                   unsigned int len, unsigned int idx);
+
+/**
+ * vu_queue_push:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: a VuVirtqElement
+ * @len: length in bytes to write
+ *
+ * Helper that combines vu_queue_fill() with a vu_queue_flush().
+ */
+void vu_queue_push(VuDev *dev, VuVirtq *vq,
+                   const VuVirtqElement *elem, unsigned int len);
+
+/**
+ * vu_queue_flush:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @num: number of elements to flush
+ *
+ * Mark the last number of elements as done (used.idx is updated by
+ * num elements).
+*/
+void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
+
+/**
+ * vu_queue_get_avail_bytes:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @in_bytes: in bytes
+ * @out_bytes: out bytes
+ * @max_in_bytes: stop counting after max_in_bytes
+ * @max_out_bytes: stop counting after max_out_bytes
+ *
+ * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
+ */
+void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
+                              unsigned int *out_bytes,
+                              unsigned max_in_bytes, unsigned max_out_bytes);
+
+/**
+ * vu_queue_avail_bytes:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @in_bytes: expected in bytes
+ * @out_bytes: expected out bytes
+ *
+ * Returns: true if in_bytes <= in_total && out_bytes <= out_total
+ */
+bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
+                          unsigned int out_bytes);
+
+#endif /* LIBVHOST_USER_H */
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
new file mode 100644
index 0000000..af4faad
--- /dev/null
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -0,0 +1,1499 @@
+/*
+ * Vhost User library
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Anthony Liguori <aliguori@us.ibm.com>
+ *  Marc-André Lureau <mlureau@redhat.com>
+ *  Victor Kaplansky <victork@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include <qemu/osdep.h>
+#include <sys/eventfd.h>
+#include <linux/vhost.h>
+
+#include "qemu/atomic.h"
+
+#include "libvhost-user.h"
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION 1
+#define LIBVHOST_USER_DEBUG 0
+
+#define DPRINT(...)                             \
+    do {                                        \
+        if (LIBVHOST_USER_DEBUG) {              \
+            fprintf(stderr, __VA_ARGS__);        \
+        }                                       \
+    } while (0)
+
+static const char *
+vu_request_to_string(int req)
+{
+#define REQ(req) [req] = #req
+    static const char *vu_request_str[] = {
+        REQ(VHOST_USER_NONE),
+        REQ(VHOST_USER_GET_FEATURES),
+        REQ(VHOST_USER_SET_FEATURES),
+        REQ(VHOST_USER_NONE),
+        REQ(VHOST_USER_GET_FEATURES),
+        REQ(VHOST_USER_SET_FEATURES),
+        REQ(VHOST_USER_SET_OWNER),
+        REQ(VHOST_USER_RESET_OWNER),
+        REQ(VHOST_USER_SET_MEM_TABLE),
+        REQ(VHOST_USER_SET_LOG_BASE),
+        REQ(VHOST_USER_SET_LOG_FD),
+        REQ(VHOST_USER_SET_VRING_NUM),
+        REQ(VHOST_USER_SET_VRING_ADDR),
+        REQ(VHOST_USER_SET_VRING_BASE),
+        REQ(VHOST_USER_GET_VRING_BASE),
+        REQ(VHOST_USER_SET_VRING_KICK),
+        REQ(VHOST_USER_SET_VRING_CALL),
+        REQ(VHOST_USER_SET_VRING_ERR),
+        REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
+        REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
+        REQ(VHOST_USER_GET_QUEUE_NUM),
+        REQ(VHOST_USER_SET_VRING_ENABLE),
+        REQ(VHOST_USER_SEND_RARP),
+        REQ(VHOST_USER_INPUT_GET_CONFIG),
+        REQ(VHOST_USER_MAX),
+    };
+#undef REQ
+
+    if (req < VHOST_USER_MAX) {
+        return vu_request_str[req];
+    } else {
+        return "unknown";
+    }
+}
+
+static void
+vu_panic(VuDev *dev, const char *msg, ...)
+{
+    char *buf = NULL;
+    va_list ap;
+
+    va_start(ap, msg);
+    (void)vasprintf(&buf, msg, ap);
+    va_end(ap);
+
+    dev->broken = true;
+    dev->panic(dev, buf);
+    free(buf);
+
+    /* FIXME: find a way to call virtio_error? */
+}
+
+/* Translate guest physical address to our virtual address.  */
+void *
+vu_gpa_to_va(VuDev *dev, uint64_t guest_addr)
+{
+    int i;
+
+    /* Find matching memory region.  */
+    for (i = 0; i < dev->nregions; i++) {
+        VuDevRegion *r = &dev->regions[i];
+
+        if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
+            return (void *)(uintptr_t)
+                guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
+        }
+    }
+
+    return NULL;
+}
+
+/* Translate qemu virtual address to our virtual address.  */
+static void *
+qva_to_va(VuDev *dev, uint64_t qemu_addr)
+{
+    int i;
+
+    /* Find matching memory region.  */
+    for (i = 0; i < dev->nregions; i++) {
+        VuDevRegion *r = &dev->regions[i];
+
+        if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
+            return (void *)(uintptr_t)
+                qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+        }
+    }
+
+    return NULL;
+}
+
+static void
+vmsg_close_fds(VhostUserMsg *vmsg)
+{
+    int i;
+
+    for (i = 0; i < vmsg->fd_num; i++) {
+        close(vmsg->fds[i]);
+    }
+}
+
+static bool
+vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
+{
+    char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
+    struct iovec iov = {
+        .iov_base = (char *)vmsg,
+        .iov_len = VHOST_USER_HDR_SIZE,
+    };
+    struct msghdr msg = {
+        .msg_iov = &iov,
+        .msg_iovlen = 1,
+        .msg_control = control,
+        .msg_controllen = sizeof(control),
+    };
+    size_t fd_size;
+    struct cmsghdr *cmsg;
+    int rc;
+
+    do {
+        rc = recvmsg(conn_fd, &msg, 0);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    if (rc <= 0) {
+        vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
+        return false;
+    }
+
+    vmsg->fd_num = 0;
+    for (cmsg = CMSG_FIRSTHDR(&msg);
+         cmsg != NULL;
+         cmsg = CMSG_NXTHDR(&msg, cmsg))
+    {
+        if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+            fd_size = cmsg->cmsg_len - CMSG_LEN(0);
+            vmsg->fd_num = fd_size / sizeof(int);
+            memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+            break;
+        }
+    }
+
+    if (vmsg->size > sizeof(vmsg->payload)) {
+        vu_panic(dev,
+                 "Error: too big message request: %d, size: vmsg->size: %u, "
+                 "while sizeof(vmsg->payload) = %zu\n",
+                 vmsg->request, vmsg->size, sizeof(vmsg->payload));
+        goto fail;
+    }
+
+    if (vmsg->size) {
+        do {
+            rc = read(conn_fd, &vmsg->payload, vmsg->size);
+        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+        if (rc <= 0) {
+            vu_panic(dev, "Error while reading: %s", strerror(errno));
+            goto fail;
+        }
+
+        assert(rc == vmsg->size);
+    }
+
+    return true;
+
+fail:
+    vmsg_close_fds(vmsg);
+
+    return false;
+}
+
+static bool
+vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
+{
+    int rc;
+    uint8_t *p = (uint8_t *)vmsg;
+
+    /* Set the version in the flags when sending the reply */
+    vmsg->flags &= ~VHOST_USER_VERSION_MASK;
+    vmsg->flags |= VHOST_USER_VERSION;
+    vmsg->flags |= VHOST_USER_REPLY_MASK;
+
+    do {
+        rc = write(conn_fd, p, VHOST_USER_HDR_SIZE);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    do {
+        if (vmsg->data) {
+            rc = write(conn_fd, vmsg->data, vmsg->size);
+        } else {
+            rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
+        }
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    if (rc <= 0) {
+        vu_panic(dev, "Error while writing: %s", strerror(errno));
+        return false;
+    }
+
+    return true;
+}
+
+/* Kick the log_call_fd if required. */
+static void
+vu_log_kick(VuDev *dev)
+{
+    if (dev->log_call_fd != -1) {
+        DPRINT("Kicking the QEMU's log...\n");
+        if (eventfd_write(dev->log_call_fd, 1) < 0) {
+            vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
+        }
+    }
+}
+
+static void
+vu_log_page(uint8_t *log_table, uint64_t page)
+{
+    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+    atomic_or(&log_table[page / 8], 1 << (page % 8));
+}
+
+static void
+vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
+{
+    uint64_t page;
+
+    if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
+        !dev->log_table || !length) {
+        return;
+    }
+
+    assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
+
+    page = address / VHOST_LOG_PAGE;
+    while (page * VHOST_LOG_PAGE < address + length) {
+        vu_log_page(dev->log_table, page);
+        page += VHOST_LOG_PAGE;
+    }
+
+    vu_log_kick(dev);
+}
+
+static void
+vu_kick_cb(VuDev *dev, int condition, void *data)
+{
+    int index = (intptr_t)data;
+    VuVirtq *vq = &dev->vq[index];
+    int sock = vq->kick_fd;
+    eventfd_t kick_data;
+    ssize_t rc;
+
+    rc = eventfd_read(sock, &kick_data);
+    if (rc == -1) {
+        vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
+        dev->remove_watch(dev, dev->vq[index].kick_fd);
+    } else {
+        DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
+               kick_data, vq->handler, index);
+        if (vq->handler) {
+            vq->handler(dev, index);
+        }
+    }
+}
+
+static bool
+vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    vmsg->payload.u64 =
+        1ULL << VHOST_F_LOG_ALL |
+        1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
+
+    if (dev->iface->get_features) {
+        vmsg->payload.u64 |= dev->iface->get_features(dev);
+    }
+
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    return true;
+}
+
+static void
+vu_set_enable_all_rings(VuDev *dev, bool enabled)
+{
+    int i;
+
+    for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+        dev->vq[i].enable = enabled;
+    }
+}
+
+static bool
+vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    dev->features = vmsg->payload.u64;
+
+    if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
+        vu_set_enable_all_rings(dev, true);
+    }
+
+    if (dev->iface->set_features) {
+        dev->iface->set_features(dev, dev->features);
+    }
+
+    return false;
+}
+
+static bool
+vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    return false;
+}
+
+static void
+vu_close_log(VuDev *dev)
+{
+    if (dev->log_table) {
+        if (munmap(dev->log_table, dev->log_size) != 0) {
+            perror("close log munmap() error");
+        }
+
+        dev->log_table = NULL;
+    }
+    if (dev->log_call_fd != -1) {
+        close(dev->log_call_fd);
+        dev->log_call_fd = -1;
+    }
+}
+
+static bool
+vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    vu_set_enable_all_rings(dev, false);
+
+    return false;
+}
+
+static bool
+vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int i;
+    VhostUserMemory *memory = &vmsg->payload.memory;
+    dev->nregions = memory->nregions;
+
+    DPRINT("Nregions: %d\n", memory->nregions);
+    for (i = 0; i < dev->nregions; i++) {
+        void *mmap_addr;
+        VhostUserMemoryRegion *msg_region = &memory->regions[i];
+        VuDevRegion *dev_region = &dev->regions[i];
+
+        DPRINT("Region %d\n", i);
+        DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
+               msg_region->guest_phys_addr);
+        DPRINT("    memory_size:     0x%016"PRIx64"\n",
+               msg_region->memory_size);
+        DPRINT("    userspace_addr   0x%016"PRIx64"\n",
+               msg_region->userspace_addr);
+        DPRINT("    mmap_offset      0x%016"PRIx64"\n",
+               msg_region->mmap_offset);
+
+        dev_region->gpa = msg_region->guest_phys_addr;
+        dev_region->size = msg_region->memory_size;
+        dev_region->qva = msg_region->userspace_addr;
+        dev_region->mmap_offset = msg_region->mmap_offset;
+
+        /* We don't use offset argument of mmap() since the
+         * mapped address has to be page aligned, and we use huge
+         * pages.  */
+        mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
+                         PROT_READ | PROT_WRITE, MAP_SHARED,
+                         vmsg->fds[i], 0);
+
+        if (mmap_addr == MAP_FAILED) {
+            vu_panic(dev, "region mmap error: %s", strerror(errno));
+        } else {
+            dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
+            DPRINT("    mmap_addr:       0x%016"PRIx64"\n",
+                   dev_region->mmap_addr);
+        }
+
+        close(vmsg->fds[i]);
+    }
+
+    return false;
+}
+
+static bool
+vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int fd;
+    uint64_t log_mmap_size, log_mmap_offset;
+    void *rc;
+
+    if (vmsg->fd_num != 1 ||
+        vmsg->size != sizeof(vmsg->payload.log)) {
+        vu_panic(dev, "Invalid log_base message");
+        return true;
+    }
+
+    fd = vmsg->fds[0];
+    log_mmap_offset = vmsg->payload.log.mmap_offset;
+    log_mmap_size = vmsg->payload.log.mmap_size;
+    DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
+    DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
+
+    rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+              log_mmap_offset);
+    if (rc == MAP_FAILED) {
+        perror("log mmap error");
+    }
+    dev->log_table = rc;
+    dev->log_size = log_mmap_size;
+
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    return true;
+}
+
+static bool
+vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    if (vmsg->fd_num != 1) {
+        vu_panic(dev, "Invalid log_fd message");
+        return false;
+    }
+
+    if (dev->log_call_fd != -1) {
+        close(dev->log_call_fd);
+    }
+    dev->log_call_fd = vmsg->fds[0];
+    DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
+
+    return false;
+}
+
+static bool
+vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+    unsigned int num = vmsg->payload.state.num;
+
+    DPRINT("State.index: %d\n", index);
+    DPRINT("State.num:   %d\n", num);
+    dev->vq[index].vring.num = num;
+
+    return false;
+}
+
+static bool
+vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    struct vhost_vring_addr *vra = &vmsg->payload.addr;
+    unsigned int index = vra->index;
+    VuVirtq *vq = &dev->vq[index];
+
+    DPRINT("vhost_vring_addr:\n");
+    DPRINT("    index:  %d\n", vra->index);
+    DPRINT("    flags:  %d\n", vra->flags);
+    DPRINT("    desc_user_addr:   0x%016llx\n", vra->desc_user_addr);
+    DPRINT("    used_user_addr:   0x%016llx\n", vra->used_user_addr);
+    DPRINT("    avail_user_addr:  0x%016llx\n", vra->avail_user_addr);
+    DPRINT("    log_guest_addr:   0x%016llx\n", vra->log_guest_addr);
+
+    vq->vring.flags = vra->flags;
+    vq->vring.desc = qva_to_va(dev, vra->desc_user_addr);
+    vq->vring.used = qva_to_va(dev, vra->used_user_addr);
+    vq->vring.avail = qva_to_va(dev, vra->avail_user_addr);
+    vq->vring.log_guest_addr = vra->log_guest_addr;
+
+    DPRINT("Setting virtq addresses:\n");
+    DPRINT("    vring_desc  at %p\n", vq->vring.desc);
+    DPRINT("    vring_used  at %p\n", vq->vring.used);
+    DPRINT("    vring_avail at %p\n", vq->vring.avail);
+
+    if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) {
+        vu_panic(dev, "Invalid vring_addr message");
+        return false;
+    }
+
+    vq->used_idx = vq->vring.used->idx;
+
+    return false;
+}
+
+static bool
+vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+    unsigned int num = vmsg->payload.state.num;
+
+    DPRINT("State.index: %d\n", index);
+    DPRINT("State.num:   %d\n", num);
+    dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
+
+    return false;
+}
+
+static bool
+vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+
+    DPRINT("State.index: %d\n", index);
+    vmsg->payload.state.num = dev->vq[index].last_avail_idx;
+    vmsg->size = sizeof(vmsg->payload.state);
+
+    dev->vq[index].started = false;
+    if (dev->iface->queue_set_started) {
+        dev->iface->queue_set_started(dev, index, false);
+    }
+
+    if (dev->vq[index].call_fd != -1) {
+        close(dev->vq[index].call_fd);
+        dev->vq[index].call_fd = -1;
+    }
+    if (dev->vq[index].kick_fd != -1) {
+        dev->remove_watch(dev, dev->vq[index].kick_fd);
+        close(dev->vq[index].kick_fd);
+        dev->vq[index].kick_fd = -1;
+    }
+
+    return true;
+}
+
+static bool
+vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+    if (index >= VHOST_MAX_NR_VIRTQUEUE) {
+        vmsg_close_fds(vmsg);
+        vu_panic(dev, "Invalid queue index: %u", index);
+        return false;
+    }
+
+    if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK ||
+        vmsg->fd_num != 1) {
+        vmsg_close_fds(vmsg);
+        vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
+        return false;
+    }
+
+    return true;
+}
+
+static bool
+vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    if (!vu_check_queue_msg_file(dev, vmsg)) {
+        return false;
+    }
+
+    if (dev->vq[index].kick_fd != -1) {
+        dev->remove_watch(dev, dev->vq[index].kick_fd);
+        close(dev->vq[index].kick_fd);
+        dev->vq[index].kick_fd = -1;
+    }
+
+    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+        dev->vq[index].kick_fd = vmsg->fds[0];
+        DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
+    }
+
+    dev->vq[index].started = true;
+    if (dev->iface->queue_set_started) {
+        dev->iface->queue_set_started(dev, index, true);
+    }
+
+    if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
+        dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
+                       vu_kick_cb, (void *)(long)index);
+
+        DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
+               dev->vq[index].kick_fd, index);
+    }
+
+    return false;
+}
+
+void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
+                          vu_queue_handler_cb handler)
+{
+    int qidx = vq - dev->vq;
+
+    vq->handler = handler;
+    if (vq->kick_fd >= 0) {
+        if (handler) {
+            dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
+                           vu_kick_cb, (void *)(long)qidx);
+        } else {
+            dev->remove_watch(dev, vq->kick_fd);
+        }
+    }
+}
+
+static bool
+vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    if (!vu_check_queue_msg_file(dev, vmsg)) {
+        return false;
+    }
+
+    if (dev->vq[index].call_fd != -1) {
+        close(dev->vq[index].call_fd);
+        dev->vq[index].call_fd = -1;
+    }
+
+    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+        dev->vq[index].call_fd = vmsg->fds[0];
+    }
+
+    DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
+
+    return false;
+}
+
+static bool
+vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    if (!vu_check_queue_msg_file(dev, vmsg)) {
+        return false;
+    }
+
+    if (dev->vq[index].err_fd != -1) {
+        close(dev->vq[index].err_fd);
+        dev->vq[index].err_fd = -1;
+    }
+
+    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+        dev->vq[index].err_fd = vmsg->fds[0];
+    }
+
+    return false;
+}
+
+static bool
+vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
+
+    if (dev->iface->get_protocol_features) {
+        features |= dev->iface->get_protocol_features(dev);
+    }
+
+    vmsg->payload.u64 = features;
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    return true;
+}
+
+static bool
+vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    uint64_t features = vmsg->payload.u64;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", features);
+
+    dev->protocol_features = vmsg->payload.u64;
+
+    if (dev->iface->set_protocol_features) {
+        dev->iface->set_protocol_features(dev, features);
+    }
+
+    return false;
+}
+
+static bool
+vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return false;
+}
+
+static bool
+vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+    unsigned int enable = vmsg->payload.state.num;
+
+    DPRINT("State.index: %d\n", index);
+    DPRINT("State.enable:   %d\n", enable);
+
+    if (index >= VHOST_MAX_NR_VIRTQUEUE) {
+        vu_panic(dev, "Invalid vring_enable index: %u", index);
+        return false;
+    }
+
+    dev->vq[index].enable = enable;
+    return false;
+}
+
+static bool
+vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int do_reply = 0;
+
+    /* Print out generic part of the request. */
+    DPRINT("================ Vhost user message ================\n");
+    DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
+           vmsg->request);
+    DPRINT("Flags:   0x%x\n", vmsg->flags);
+    DPRINT("Size:    %d\n", vmsg->size);
+
+    if (vmsg->fd_num) {
+        int i;
+        DPRINT("Fds:");
+        for (i = 0; i < vmsg->fd_num; i++) {
+            DPRINT(" %d", vmsg->fds[i]);
+        }
+        DPRINT("\n");
+    }
+
+    if (dev->iface->process_msg &&
+        dev->iface->process_msg(dev, vmsg, &do_reply)) {
+        return do_reply;
+    }
+
+    switch (vmsg->request) {
+    case VHOST_USER_GET_FEATURES:
+        return vu_get_features_exec(dev, vmsg);
+    case VHOST_USER_SET_FEATURES:
+        return vu_set_features_exec(dev, vmsg);
+    case VHOST_USER_GET_PROTOCOL_FEATURES:
+        return vu_get_protocol_features_exec(dev, vmsg);
+    case VHOST_USER_SET_PROTOCOL_FEATURES:
+        return vu_set_protocol_features_exec(dev, vmsg);
+    case VHOST_USER_SET_OWNER:
+        return vu_set_owner_exec(dev, vmsg);
+    case VHOST_USER_RESET_OWNER:
+        return vu_reset_device_exec(dev, vmsg);
+    case VHOST_USER_SET_MEM_TABLE:
+        return vu_set_mem_table_exec(dev, vmsg);
+    case VHOST_USER_SET_LOG_BASE:
+        return vu_set_log_base_exec(dev, vmsg);
+    case VHOST_USER_SET_LOG_FD:
+        return vu_set_log_fd_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_NUM:
+        return vu_set_vring_num_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ADDR:
+        return vu_set_vring_addr_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_BASE:
+        return vu_set_vring_base_exec(dev, vmsg);
+    case VHOST_USER_GET_VRING_BASE:
+        return vu_get_vring_base_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_KICK:
+        return vu_set_vring_kick_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_CALL:
+        return vu_set_vring_call_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ERR:
+        return vu_set_vring_err_exec(dev, vmsg);
+    case VHOST_USER_GET_QUEUE_NUM:
+        return vu_get_queue_num_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ENABLE:
+        return vu_set_vring_enable_exec(dev, vmsg);
+    default:
+        vmsg_close_fds(vmsg);
+        vu_panic(dev, "Unhandled request: %d", vmsg->request);
+    }
+
+    return false;
+}
+
+bool
+vu_dispatch(VuDev *dev)
+{
+    VhostUserMsg vmsg = { 0, };
+    int reply_requested;
+    bool success = false;
+
+    if (!vu_message_read(dev, dev->sock, &vmsg)) {
+        goto end;
+    }
+
+    reply_requested = vu_process_message(dev, &vmsg);
+    if (!reply_requested) {
+        success = true;
+        goto end;
+    }
+
+    if (!vu_message_write(dev, dev->sock, &vmsg)) {
+        goto end;
+    }
+
+    success = true;
+
+end:
+    g_free(vmsg.data);
+    return success;
+}
+
+void
+vu_deinit(VuDev *dev)
+{
+    int i;
+
+    for (i = 0; i < dev->nregions; i++) {
+        VuDevRegion *r = &dev->regions[i];
+        void *m = (void *) (uintptr_t) r->mmap_addr;
+        if (m != MAP_FAILED) {
+            munmap(m, r->size + r->mmap_offset);
+        }
+    }
+    dev->nregions = 0;
+
+    for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+        VuVirtq *vq = &dev->vq[i];
+
+        if (vq->call_fd != -1) {
+            close(vq->call_fd);
+            vq->call_fd = -1;
+        }
+
+        if (vq->kick_fd != -1) {
+            close(vq->kick_fd);
+            vq->kick_fd = -1;
+        }
+
+        if (vq->err_fd != -1) {
+            close(vq->err_fd);
+            vq->err_fd = -1;
+        }
+    }
+
+
+    vu_close_log(dev);
+
+    if (dev->sock != -1) {
+        close(dev->sock);
+    }
+}
+
+void
+vu_init(VuDev *dev,
+        int socket,
+        vu_panic_cb panic,
+        vu_set_watch_cb set_watch,
+        vu_remove_watch_cb remove_watch,
+        const VuDevIface *iface)
+{
+    int i;
+
+    assert(socket >= 0);
+    assert(set_watch);
+    assert(remove_watch);
+    assert(iface);
+    assert(panic);
+
+    memset(dev, 0, sizeof(*dev));
+
+    dev->sock = socket;
+    dev->panic = panic;
+    dev->set_watch = set_watch;
+    dev->remove_watch = remove_watch;
+    dev->iface = iface;
+    dev->log_call_fd = -1;
+    for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+        dev->vq[i] = (VuVirtq) {
+            .call_fd = -1, .kick_fd = -1, .err_fd = -1,
+            .notification = true,
+        };
+    }
+}
+
+VuVirtq *
+vu_get_queue(VuDev *dev, int qidx)
+{
+    assert(qidx < VHOST_MAX_NR_VIRTQUEUE);
+    return &dev->vq[qidx];
+}
+
+bool
+vu_queue_enabled(VuDev *dev, VuVirtq *vq)
+{
+    return vq->enable;
+}
+
+static inline uint16_t
+vring_avail_flags(VuVirtq *vq)
+{
+    return vq->vring.avail->flags;
+}
+
+static inline uint16_t
+vring_avail_idx(VuVirtq *vq)
+{
+    vq->shadow_avail_idx = vq->vring.avail->idx;
+
+    return vq->shadow_avail_idx;
+}
+
+static inline uint16_t
+vring_avail_ring(VuVirtq *vq, int i)
+{
+    return vq->vring.avail->ring[i];
+}
+
+static inline uint16_t
+vring_get_used_event(VuVirtq *vq)
+{
+    return vring_avail_ring(vq, vq->vring.num);
+}
+
+static int
+virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
+{
+    uint16_t num_heads = vring_avail_idx(vq) - idx;
+
+    /* Check it isn't doing very strange things with descriptor numbers. */
+    if (num_heads > vq->vring.num) {
+        vu_panic(dev, "Guest moved used index from %u to %u",
+                 idx, vq->shadow_avail_idx);
+        return -1;
+    }
+    if (num_heads) {
+        /* On success, callers read a descriptor at vq->last_avail_idx.
+         * Make sure descriptor read does not bypass avail index read. */
+        smp_rmb();
+    }
+
+    return num_heads;
+}
+
+static bool
+virtqueue_get_head(VuDev *dev, VuVirtq *vq,
+                   unsigned int idx, unsigned int *head)
+{
+    /* Grab the next descriptor number they're advertising, and increment
+     * the index we've seen. */
+    *head = vring_avail_ring(vq, idx % vq->vring.num);
+
+    /* If their number is silly, that's a fatal mistake. */
+    if (*head >= vq->vring.num) {
+        vu_panic(dev, "Guest says index %u is available", head);
+        return false;
+    }
+
+    return true;
+}
+
+enum {
+    VIRTQUEUE_READ_DESC_ERROR = -1,
+    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
+    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
+};
+
+static int
+virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
+                         int i, unsigned int max, unsigned int *next)
+{
+    /* If this descriptor says it doesn't chain, we're done. */
+    if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
+        return VIRTQUEUE_READ_DESC_DONE;
+    }
+
+    /* Check they're not leading us off end of descriptors. */
+    *next = desc[i].next;
+    /* Make sure compiler knows to grab that: we don't want it changing! */
+    smp_wmb();
+
+    if (*next >= max) {
+        vu_panic(dev, "Desc next is %u", next);
+        return VIRTQUEUE_READ_DESC_ERROR;
+    }
+
+    return VIRTQUEUE_READ_DESC_MORE;
+}
+
+void
+vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
+                         unsigned int *out_bytes,
+                         unsigned max_in_bytes, unsigned max_out_bytes)
+{
+    unsigned int idx;
+    unsigned int total_bufs, in_total, out_total;
+    int rc;
+
+    idx = vq->last_avail_idx;
+
+    total_bufs = in_total = out_total = 0;
+    while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
+        unsigned int max, num_bufs, indirect = 0;
+        struct vring_desc *desc;
+        unsigned int i;
+
+        max = vq->vring.num;
+        num_bufs = total_bufs;
+        if (!virtqueue_get_head(dev, vq, idx++, &i)) {
+            goto err;
+        }
+        desc = vq->vring.desc;
+
+        if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+            if (desc[i].len % sizeof(struct vring_desc)) {
+                vu_panic(dev, "Invalid size for indirect buffer table");
+                goto err;
+            }
+
+            /* If we've got too many, that implies a descriptor loop. */
+            if (num_bufs >= max) {
+                vu_panic(dev, "Looped descriptor");
+                goto err;
+            }
+
+            /* loop over the indirect descriptor table */
+            indirect = 1;
+            max = desc[i].len / sizeof(struct vring_desc);
+            desc = vu_gpa_to_va(dev, desc[i].addr);
+            num_bufs = i = 0;
+        }
+
+        do {
+            /* If we've got too many, that implies a descriptor loop. */
+            if (++num_bufs > max) {
+                vu_panic(dev, "Looped descriptor");
+                goto err;
+            }
+
+            if (desc[i].flags & VRING_DESC_F_WRITE) {
+                in_total += desc[i].len;
+            } else {
+                out_total += desc[i].len;
+            }
+            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+                goto done;
+            }
+            rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
+        } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+            goto err;
+        }
+
+        if (!indirect) {
+            total_bufs = num_bufs;
+        } else {
+            total_bufs++;
+        }
+    }
+    if (rc < 0) {
+        goto err;
+    }
+done:
+    if (in_bytes) {
+        *in_bytes = in_total;
+    }
+    if (out_bytes) {
+        *out_bytes = out_total;
+    }
+    return;
+
+err:
+    in_total = out_total = 0;
+    goto done;
+}
+
+bool
+vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
+                     unsigned int out_bytes)
+{
+    unsigned int in_total, out_total;
+
+    vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
+                             in_bytes, out_bytes);
+
+    return in_bytes <= in_total && out_bytes <= out_total;
+}
+
+/* Fetch avail_idx from VQ memory only when we really need to know if
+ * guest has added some buffers. */
+int
+vu_queue_empty(VuDev *dev, VuVirtq *vq)
+{
+    if (vq->shadow_avail_idx != vq->last_avail_idx) {
+        return 0;
+    }
+
+    return vring_avail_idx(vq) == vq->last_avail_idx;
+}
+
+static inline
+bool has_feature(uint64_t features, unsigned int fbit)
+{
+    assert(fbit < 64);
+    return !!(features & (1ULL << fbit));
+}
+
+static inline
+bool vu_has_feature(VuDev *dev,
+                    unsigned int fbit)
+{
+    return has_feature(dev->features, fbit);
+}
+
+static bool
+vring_notify(VuDev *dev, VuVirtq *vq)
+{
+    uint16_t old, new;
+    bool v;
+
+    /* We need to expose used array entries before checking used event. */
+    smp_mb();
+
+    /* Always notify when queue is empty (when feature acknowledge) */
+    if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+        !vq->inuse && vu_queue_empty(dev, vq)) {
+        return true;
+    }
+
+    if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
+    }
+
+    v = vq->signalled_used_valid;
+    vq->signalled_used_valid = true;
+    old = vq->signalled_used;
+    new = vq->signalled_used = vq->used_idx;
+    return !v || vring_need_event(vring_get_used_event(vq), new, old);
+}
+
+void
+vu_queue_notify(VuDev *dev, VuVirtq *vq)
+{
+    if (unlikely(dev->broken)) {
+        return;
+    }
+
+    if (!vring_notify(dev, vq)) {
+        DPRINT("skipped notify...\n");
+        return;
+    }
+
+    if (eventfd_write(vq->call_fd, 1) < 0) {
+        vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
+    }
+}
+
+static inline void
+vring_used_flags_set_bit(VuVirtq *vq, int mask)
+{
+    uint16_t *flags;
+
+    flags = (uint16_t *)((char*)vq->vring.used +
+                         offsetof(struct vring_used, flags));
+    *flags |= mask;
+}
+
+static inline void
+vring_used_flags_unset_bit(VuVirtq *vq, int mask)
+{
+    uint16_t *flags;
+
+    flags = (uint16_t *)((char*)vq->vring.used +
+                         offsetof(struct vring_used, flags));
+    *flags &= ~mask;
+}
+
+static inline void
+vring_set_avail_event(VuVirtq *vq, uint16_t val)
+{
+    if (!vq->notification) {
+        return;
+    }
+
+    *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val;
+}
+
+void
+vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
+{
+    vq->notification = enable;
+    if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+        vring_set_avail_event(vq, vring_avail_idx(vq));
+    } else if (enable) {
+        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
+    } else {
+        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
+    }
+    if (enable) {
+        /* Expose avail event/used flags before caller checks the avail idx. */
+        smp_mb();
+    }
+}
+
+static void
+virtqueue_map_desc(VuDev *dev,
+                   unsigned int *p_num_sg, struct iovec *iov,
+                   unsigned int max_num_sg, bool is_write,
+                   uint64_t pa, size_t sz)
+{
+    unsigned num_sg = *p_num_sg;
+
+    assert(num_sg <= max_num_sg);
+
+    if (!sz) {
+        vu_panic(dev, "virtio: zero sized buffers are not allowed");
+        return;
+    }
+
+    iov[num_sg].iov_base = vu_gpa_to_va(dev, pa);
+    iov[num_sg].iov_len = sz;
+    num_sg++;
+
+    *p_num_sg = num_sg;
+}
+
+/* Round number down to multiple */
+#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
+
+/* Round number up to multiple */
+#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
+
+static void *
+virtqueue_alloc_element(size_t sz,
+                                     unsigned out_num, unsigned in_num)
+{
+    VuVirtqElement *elem;
+    size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
+    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
+    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
+
+    assert(sz >= sizeof(VuVirtqElement));
+    elem = malloc(out_sg_end);
+    elem->out_num = out_num;
+    elem->in_num = in_num;
+    elem->in_sg = (void *)elem + in_sg_ofs;
+    elem->out_sg = (void *)elem + out_sg_ofs;
+    return elem;
+}
+
+void *
+vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
+{
+    unsigned int i, head, max;
+    VuVirtqElement *elem;
+    unsigned out_num, in_num;
+    struct iovec iov[VIRTQUEUE_MAX_SIZE];
+    struct vring_desc *desc;
+    int rc;
+
+    if (unlikely(dev->broken)) {
+        return NULL;
+    }
+
+    if (vu_queue_empty(dev, vq)) {
+        return NULL;
+    }
+    /* Needed after virtio_queue_empty(), see comment in
+     * virtqueue_num_heads(). */
+    smp_rmb();
+
+    /* When we start there are none of either input nor output. */
+    out_num = in_num = 0;
+
+    max = vq->vring.num;
+    if (vq->inuse >= vq->vring.num) {
+        vu_panic(dev, "Virtqueue size exceeded");
+        return NULL;
+    }
+
+    if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
+        return NULL;
+    }
+
+    if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+        vring_set_avail_event(vq, vq->last_avail_idx);
+    }
+
+    i = head;
+    desc = vq->vring.desc;
+    if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+        if (desc[i].len % sizeof(struct vring_desc)) {
+            vu_panic(dev, "Invalid size for indirect buffer table");
+        }
+
+        /* loop over the indirect descriptor table */
+        max = desc[i].len / sizeof(struct vring_desc);
+        desc = vu_gpa_to_va(dev, desc[i].addr);
+        i = 0;
+    }
+
+    /* Collect all the descriptors */
+    do {
+        if (desc[i].flags & VRING_DESC_F_WRITE) {
+            virtqueue_map_desc(dev, &in_num, iov + out_num,
+                               VIRTQUEUE_MAX_SIZE - out_num, true,
+                               desc[i].addr, desc[i].len);
+        } else {
+            if (in_num) {
+                vu_panic(dev, "Incorrect order for descriptors");
+                return NULL;
+            }
+            virtqueue_map_desc(dev, &out_num, iov,
+                               VIRTQUEUE_MAX_SIZE, false,
+                               desc[i].addr, desc[i].len);
+        }
+
+        /* If we've got too many, that implies a descriptor loop. */
+        if ((in_num + out_num) > max) {
+            vu_panic(dev, "Looped descriptor");
+        }
+        rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
+    } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+        return NULL;
+    }
+
+    /* Now copy what we have collected and mapped */
+    elem = virtqueue_alloc_element(sz, out_num, in_num);
+    elem->index = head;
+    for (i = 0; i < out_num; i++) {
+        elem->out_sg[i] = iov[i];
+    }
+    for (i = 0; i < in_num; i++) {
+        elem->in_sg[i] = iov[out_num + i];
+    }
+
+    vq->inuse++;
+
+    return elem;
+}
+
+bool
+vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
+{
+    if (num > vq->inuse) {
+        return false;
+    }
+    vq->last_avail_idx -= num;
+    vq->inuse -= num;
+    return true;
+}
+
+static inline
+void vring_used_write(VuDev *dev, VuVirtq *vq,
+                      struct vring_used_elem *uelem, int i)
+{
+    struct vring_used *used = vq->vring.used;
+
+    used->ring[i] = *uelem;
+    vu_log_write(dev, vq->vring.log_guest_addr +
+                 offsetof(struct vring_used, ring[i]),
+                 sizeof(used->ring[i]));
+}
+
+
+static void
+vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
+                  const VuVirtqElement *elem,
+                  unsigned int len)
+{
+    struct vring_desc *desc = vq->vring.desc;
+    unsigned int i, max, min;
+    unsigned num_bufs = 0;
+
+    max = vq->vring.num;
+    i = elem->index;
+
+    if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+        if (desc[i].len % sizeof(struct vring_desc)) {
+            vu_panic(dev, "Invalid size for indirect buffer table");
+        }
+
+        /* loop over the indirect descriptor table */
+        max = desc[i].len / sizeof(struct vring_desc);
+        desc = vu_gpa_to_va(dev, desc[i].addr);
+        i = 0;
+    }
+
+    do {
+        if (++num_bufs > max) {
+            vu_panic(dev, "Looped descriptor");
+            return;
+        }
+
+        if (desc[i].flags & VRING_DESC_F_WRITE) {
+            min = MIN(desc[i].len, len);
+            vu_log_write(dev, desc[i].addr, min);
+            len -= min;
+        }
+
+    } while (len > 0 &&
+             (virtqueue_read_next_desc(dev, desc, i, max, &i)
+              == VIRTQUEUE_READ_DESC_MORE));
+}
+
+void
+vu_queue_fill(VuDev *dev, VuVirtq *vq,
+              const VuVirtqElement *elem,
+              unsigned int len, unsigned int idx)
+{
+    struct vring_used_elem uelem;
+
+    if (unlikely(dev->broken)) {
+        return;
+    }
+
+    vu_log_queue_fill(dev, vq, elem, len);
+
+    idx = (idx + vq->used_idx) % vq->vring.num;
+
+    uelem.id = elem->index;
+    uelem.len = len;
+    vring_used_write(dev, vq, &uelem, idx);
+}
+
+static inline
+void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
+{
+    vq->vring.used->idx = val;
+    vu_log_write(dev,
+                 vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
+                 sizeof(vq->vring.used->idx));
+
+    vq->used_idx = val;
+}
+
+void
+vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
+{
+    uint16_t old, new;
+
+    if (unlikely(dev->broken)) {
+        return;
+    }
+
+    /* Make sure buffer is written before we update index. */
+    smp_wmb();
+
+    old = vq->used_idx;
+    new = old + count;
+    vring_used_idx_set(dev, vq, new);
+    vq->inuse -= count;
+    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
+        vq->signalled_used_valid = false;
+    }
+}
+
+void
+vu_queue_push(VuDev *dev, VuVirtq *vq,
+              const VuVirtqElement *elem, unsigned int len)
+{
+    vu_queue_fill(dev, vq, elem, len, 0);
+    vu_queue_flush(dev, vq, 1);
+}
diff --git a/contrib/libvhost-user/Makefile.objs b/contrib/libvhost-user/Makefile.objs
new file mode 100644
index 0000000..cef1ad6
--- /dev/null
+++ b/contrib/libvhost-user/Makefile.objs
@@ -0,0 +1 @@
+libvhost-user-obj-y = libvhost-user.o
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
@ 2016-11-10 16:12 Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 05/47] contrib: add libvhost-user Michael S. Tsirkin
                   ` (47 more replies)
  0 siblings, 48 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Stefan Hajnoczi

The following changes since commit 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6:

  MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +0000)

are available in the git repository at:

  git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17:

  docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200)

----------------------------------------------------------------
virtio, vhost, pc, pci: tests, documentation, fixes and cleanups

Lots of fixes all over the place.  I allowed some cleanups in even though they
are not strictly bugfixes, they might prevent bugs and seem very safe.

Most importantly, this fixes a regression with vhost introduced
by the last pull.

libvhost-user is the only thing that might be controvertial here, but it's only
affecting contrib/ and tests so I think it's still fair game, and several of
people were asking for it.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

----------------------------------------------------------------
Cao jin (11):
      pcie_aer: Convert pcie_aer_init to Error
      msix: Follow CODING_STYLE
      hcd-xhci: check & correct param before using it
      pci: Convert msix_init() to Error and fix callers to check it
      megasas: change behaviour of msix switch
      hcd-xhci: change behaviour of msix switch
      megasas: remove unnecessary megasas_use_msix()
      megasas: undo the overwrites of msi user configuration
      vmxnet3: fix reference leak issue
      vmxnet3: remove unnecessary internal msix flag
      msi_init: convert assert to return -errno

Felipe Franciosi (2):
      vhost: Update 'ioeventfd_started' with host notifiers
      vhost: Use vbus var instead of VIRTIO_BUS() macro

Gonglei (1):
      virtio-crypto: tag as not hotpluggable and migration

Greg Kurz (3):
      vhost: adapt vhost_verify_ring_mappings() to virtio 1 ring layout
      vhost: drop legacy vring layout bits
      virtio: drop virtio_queue_get_ring_{size,addr}()

Jason Wang (1):
      intel_iommu: fixing source id during IOTLB hash key calculation

Ladi Prosek (2):
      virtio: rename virtqueue_discard to virtqueue_unpop
      virtio: make virtqueue_alloc_element static

Marc-André Lureau (6):
      tests/vhost-user-bridge: remove false comment
      tests/vhost-user-bridge: remove unnecessary dispatcher_remove
      tests/vhost-user-bridge: indicate peer disconnected
      tests/vhost-user-bridge: do not accept more than one connection
      contrib: add libvhost-user
      tests/vhost-user-bridge: use contrib/libvhost-user

Marcel Apfelbaum (1):
      docs: add PCIe devices placement guidelines

Michael S. Tsirkin (2):
      virtio: allow per-device-class legacy features
      virtio-net: mark VIRTIO_NET_F_GSO as legacy

Peter Xu (3):
      intel_iommu: fix several incorrect endianess and bit fields
      intel_iommu: fix incorrect assert
      acpi: fix DMAR device scope for IOAPIC

Rafael David Tinoco (1):
      vhost: migration blocker only if shared log is used

Xiao Guangrong (14):
      qdev: hotplug: drop HotplugHandler.post_plug callback
      nvdimm acpi: drop the lock of fit buffer
      pc: memhp: move nvdimm hotplug out of memory hotplug
      pc: memhp: stop handling nvdimm hotplug in pc_dimm_unplug
      nvdimm acpi: clean up nvdimm_build_acpi
      docs: improve the doc of Read FIT method
      nvdimm acpi: rename nvdimm_plugged_device_list
      nvdimm acpi: cleanup nvdimm_build_fit
      nvdimm acpi: rename nvdimm_acpi_hotplug
      nvdimm acpi: define DSM return codes
      nvdimm acpi: fix two comments
      nvdimm acpi: rename nvdimm_dsm_reserved_root
      nvdimm acpi: use aml_name_decl to define named object
      nvdimm acpi: introduce NVDIMM_DSM_MEMORY_SIZE

 Makefile                                |    1 +
 Makefile.objs                           |    2 +-
 contrib/libvhost-user/libvhost-user.h   |  435 +++++++++
 hw/i386/intel_iommu_internal.h          |    2 +-
 include/hw/acpi/acpi-defs.h             |    5 +-
 include/hw/hotplug.h                    |   11 -
 include/hw/i386/intel_iommu.h           |    9 +-
 include/hw/mem/nvdimm.h                 |   20 +-
 include/hw/pci/msix.h                   |    5 +-
 include/hw/pci/pcie_aer.h               |    3 +-
 include/hw/virtio/vhost.h               |    7 +-
 include/hw/virtio/virtio.h              |   12 +-
 contrib/libvhost-user/libvhost-user.c   | 1499 +++++++++++++++++++++++++++++++
 hw/acpi/ich9.c                          |    8 +-
 hw/acpi/memory_hotplug.c                |   31 +-
 hw/acpi/nvdimm.c                        |  166 ++--
 hw/acpi/piix4.c                         |    7 +-
 hw/block/nvme.c                         |    5 +-
 hw/core/hotplug.c                       |   11 -
 hw/core/qdev.c                          |   20 +-
 hw/i386/acpi-build.c                    |    3 +-
 hw/i386/intel_iommu.c                   |    6 +-
 hw/i386/pc.c                            |   29 +-
 hw/misc/ivshmem.c                       |    8 +-
 hw/net/e1000e.c                         |    8 +-
 hw/net/rocker/rocker.c                  |    7 +-
 hw/net/virtio-net.c                     |    3 +-
 hw/net/vmxnet3.c                        |   46 +-
 hw/pci-bridge/ioh3420.c                 |    3 +-
 hw/pci-bridge/xio3130_downstream.c      |    3 +-
 hw/pci-bridge/xio3130_upstream.c        |    3 +-
 hw/pci/msi.c                            |    9 +-
 hw/pci/msix.c                           |   42 +-
 hw/pci/pcie_aer.c                       |   17 +-
 hw/s390x/virtio-ccw.c                   |    4 +-
 hw/scsi/megasas.c                       |   49 +-
 hw/usb/hcd-xhci.c                       |   71 +-
 hw/vfio/pci.c                           |    8 +-
 hw/virtio/vhost.c                       |  106 ++-
 hw/virtio/virtio-balloon.c              |    2 +-
 hw/virtio/virtio-crypto-pci.c           |    2 +-
 hw/virtio/virtio-crypto.c               |    1 +
 hw/virtio/virtio-pci.c                  |   15 +-
 hw/virtio/virtio.c                      |   23 +-
 tests/vhost-user-bridge.c               | 1183 +++++-------------------
 contrib/libvhost-user/Makefile.objs     |    1 +
 default-configs/mips-softmmu-common.mak |    1 +
 docs/pcie.txt                           |  310 +++++++
 docs/specs/acpi_mem_hotplug.txt         |    3 -
 docs/specs/acpi_nvdimm.txt              |   99 +-
 tests/Makefile.include                  |    2 +-
 51 files changed, 2940 insertions(+), 1386 deletions(-)
 create mode 100644 contrib/libvhost-user/libvhost-user.h
 create mode 100644 contrib/libvhost-user/libvhost-user.c
 create mode 100644 contrib/libvhost-user/Makefile.objs
 create mode 100644 docs/pcie.txt

^ permalink raw reply	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 01/47] tests/vhost-user-bridge: remove false comment
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 05/47] contrib: add libvhost-user Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 02/47] tests/vhost-user-bridge: remove unnecessary dispatcher_remove Michael S. Tsirkin
                   ` (45 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Marc-André Lureau, Victor Kaplansky, Yuanhan Liu

From: Marc-André Lureau <marcandre.lureau@redhat.com>

dispatcher_remove() is in use.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tests/vhost-user-bridge.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 775e031..e91279b 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -101,8 +101,6 @@ dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
     return 0;
 }
 
-/* dispatcher_remove() is not currently in use but may be useful
- * in the future. */
 static int
 dispatcher_remove(Dispatcher *dispr, int sock)
 {
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 02/47] tests/vhost-user-bridge: remove unnecessary dispatcher_remove
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 05/47] contrib: add libvhost-user Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 01/47] tests/vhost-user-bridge: remove false comment Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 03/47] tests/vhost-user-bridge: indicate peer disconnected Michael S. Tsirkin
                   ` (44 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Marc-André Lureau, Victor Kaplansky,
	Yuanhan Liu, Eric Blake

From: Marc-André Lureau <marcandre.lureau@redhat.com>

The call fd is not watched

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tests/vhost-user-bridge.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index e91279b..19b0e94 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -979,7 +979,6 @@ vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
 
     if (dev->vq[index].call_fd != -1) {
         close(dev->vq[index].call_fd);
-        dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
         dev->vq[index].call_fd = -1;
     }
     if (dev->vq[index].kick_fd != -1) {
@@ -1043,7 +1042,6 @@ vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
 
     if (dev->vq[index].call_fd != -1) {
         close(dev->vq[index].call_fd);
-        dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
     }
     dev->vq[index].call_fd = vmsg->fds[0];
     DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 03/47] tests/vhost-user-bridge: indicate peer disconnected
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (2 preceding siblings ...)
  2016-11-10 16:12 ` [Qemu-devel] [PULL 02/47] tests/vhost-user-bridge: remove unnecessary dispatcher_remove Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 04/47] tests/vhost-user-bridge: do not accept more than one connection Michael S. Tsirkin
                   ` (43 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Marc-André Lureau, Victor Kaplansky, Yuanhan Liu

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tests/vhost-user-bridge.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 19b0e94..97e45d8 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -385,7 +385,6 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
     rc = recvmsg(conn_fd, &msg, 0);
 
     if (rc == 0) {
-        vubr_die("recvmsg");
         fprintf(stderr, "Peer disconnected.\n");
         exit(1);
     }
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 04/47] tests/vhost-user-bridge: do not accept more than one connection
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (3 preceding siblings ...)
  2016-11-10 16:12 ` [Qemu-devel] [PULL 03/47] tests/vhost-user-bridge: indicate peer disconnected Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 06/47] tests/vhost-user-bridge: use contrib/libvhost-user Michael S. Tsirkin
                   ` (42 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Marc-André Lureau, Victor Kaplansky, Yuanhan Liu

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tests/vhost-user-bridge.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 97e45d8..5b618f6 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -1200,6 +1200,7 @@ vubr_accept_cb(int sock, void *ctx)
     }
     DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
     dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
+    dispatcher_remove(&dev->dispatcher, sock);
 }
 
 static VubrDev *
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 06/47] tests/vhost-user-bridge: use contrib/libvhost-user
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (4 preceding siblings ...)
  2016-11-10 16:12 ` [Qemu-devel] [PULL 04/47] tests/vhost-user-bridge: do not accept more than one connection Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 07/47] intel_iommu: fixing source id during IOTLB hash key calculation Michael S. Tsirkin
                   ` (41 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Marc-André Lureau, Eric Blake,
	Daniel P. Berrange, Markus Armbruster, Victor Kaplansky,
	Yuanhan Liu

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Use the libvhost-user library.

This ended up being a rather large patch that cannot be easily splitted,
due to massive code move and API changes.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tests/vhost-user-bridge.c | 1177 +++++++++------------------------------------
 tests/Makefile.include    |    2 +-
 2 files changed, 229 insertions(+), 950 deletions(-)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 5b618f6..8618c20 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -30,17 +30,9 @@
 #define _FILE_OFFSET_BITS 64
 
 #include "qemu/osdep.h"
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/unistd.h>
-#include <sys/eventfd.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <linux/vhost.h>
-
-#include "qemu/atomic.h"
+#include "qemu/iov.h"
 #include "standard-headers/linux/virtio_net.h"
-#include "standard-headers/linux/virtio_ring.h"
+#include "contrib/libvhost-user/libvhost-user.h"
 
 #define VHOST_USER_BRIDGE_DEBUG 1
 
@@ -64,6 +56,17 @@ typedef struct Dispatcher {
     Event events[FD_SETSIZE];
 } Dispatcher;
 
+typedef struct VubrDev {
+    VuDev vudev;
+    Dispatcher dispatcher;
+    int backend_udp_sock;
+    struct sockaddr_in backend_udp_dest;
+    int hdrlen;
+    int sock;
+    int ready;
+    int quit;
+} VubrDev;
+
 static void
 vubr_die(const char *s)
 {
@@ -155,1036 +158,313 @@ dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
     return 0;
 }
 
-typedef struct VubrVirtq {
-    int call_fd;
-    int kick_fd;
-    uint32_t size;
-    uint16_t last_avail_index;
-    uint16_t last_used_index;
-    struct vring_desc *desc;
-    struct vring_avail *avail;
-    struct vring_used *used;
-    uint64_t log_guest_addr;
-    int enable;
-} VubrVirtq;
-
-/* Based on qemu/hw/virtio/vhost-user.c */
-
-#define VHOST_MEMORY_MAX_NREGIONS    8
-#define VHOST_USER_F_PROTOCOL_FEATURES 30
-/* v1.0 compliant. */
-#define VIRTIO_F_VERSION_1		32
-
-#define VHOST_LOG_PAGE 4096
-
-enum VhostUserProtocolFeature {
-    VHOST_USER_PROTOCOL_F_MQ = 0,
-    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
-    VHOST_USER_PROTOCOL_F_RARP = 2,
-
-    VHOST_USER_PROTOCOL_F_MAX
-};
-
-#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
-
-typedef enum VhostUserRequest {
-    VHOST_USER_NONE = 0,
-    VHOST_USER_GET_FEATURES = 1,
-    VHOST_USER_SET_FEATURES = 2,
-    VHOST_USER_SET_OWNER = 3,
-    VHOST_USER_RESET_OWNER = 4,
-    VHOST_USER_SET_MEM_TABLE = 5,
-    VHOST_USER_SET_LOG_BASE = 6,
-    VHOST_USER_SET_LOG_FD = 7,
-    VHOST_USER_SET_VRING_NUM = 8,
-    VHOST_USER_SET_VRING_ADDR = 9,
-    VHOST_USER_SET_VRING_BASE = 10,
-    VHOST_USER_GET_VRING_BASE = 11,
-    VHOST_USER_SET_VRING_KICK = 12,
-    VHOST_USER_SET_VRING_CALL = 13,
-    VHOST_USER_SET_VRING_ERR = 14,
-    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
-    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
-    VHOST_USER_GET_QUEUE_NUM = 17,
-    VHOST_USER_SET_VRING_ENABLE = 18,
-    VHOST_USER_SEND_RARP = 19,
-    VHOST_USER_MAX
-} VhostUserRequest;
-
-typedef struct VhostUserMemoryRegion {
-    uint64_t guest_phys_addr;
-    uint64_t memory_size;
-    uint64_t userspace_addr;
-    uint64_t mmap_offset;
-} VhostUserMemoryRegion;
-
-typedef struct VhostUserMemory {
-    uint32_t nregions;
-    uint32_t padding;
-    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
-} VhostUserMemory;
-
-typedef struct VhostUserLog {
-    uint64_t mmap_size;
-    uint64_t mmap_offset;
-} VhostUserLog;
-
-typedef struct VhostUserMsg {
-    VhostUserRequest request;
-
-#define VHOST_USER_VERSION_MASK     (0x3)
-#define VHOST_USER_REPLY_MASK       (0x1<<2)
-    uint32_t flags;
-    uint32_t size; /* the following payload size */
-    union {
-#define VHOST_USER_VRING_IDX_MASK   (0xff)
-#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
-        uint64_t u64;
-        struct vhost_vring_state state;
-        struct vhost_vring_addr addr;
-        VhostUserMemory memory;
-        VhostUserLog log;
-    } payload;
-    int fds[VHOST_MEMORY_MAX_NREGIONS];
-    int fd_num;
-} QEMU_PACKED VhostUserMsg;
-
-#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
-
-/* The version of the protocol we support */
-#define VHOST_USER_VERSION    (0x1)
-
-#define MAX_NR_VIRTQUEUE (8)
-
-typedef struct VubrDevRegion {
-    /* Guest Physical address. */
-    uint64_t gpa;
-    /* Memory region size. */
-    uint64_t size;
-    /* QEMU virtual address (userspace). */
-    uint64_t qva;
-    /* Starting offset in our mmaped space. */
-    uint64_t mmap_offset;
-    /* Start address of mmaped space. */
-    uint64_t mmap_addr;
-} VubrDevRegion;
-
-typedef struct VubrDev {
-    int sock;
-    Dispatcher dispatcher;
-    uint32_t nregions;
-    VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
-    VubrVirtq vq[MAX_NR_VIRTQUEUE];
-    int log_call_fd;
-    uint64_t log_size;
-    uint8_t *log_table;
-    int backend_udp_sock;
-    struct sockaddr_in backend_udp_dest;
-    int ready;
-    uint64_t features;
-    int hdrlen;
-} VubrDev;
-
-static const char *vubr_request_str[] = {
-    [VHOST_USER_NONE]                   =  "VHOST_USER_NONE",
-    [VHOST_USER_GET_FEATURES]           =  "VHOST_USER_GET_FEATURES",
-    [VHOST_USER_SET_FEATURES]           =  "VHOST_USER_SET_FEATURES",
-    [VHOST_USER_SET_OWNER]              =  "VHOST_USER_SET_OWNER",
-    [VHOST_USER_RESET_OWNER]           =  "VHOST_USER_RESET_OWNER",
-    [VHOST_USER_SET_MEM_TABLE]          =  "VHOST_USER_SET_MEM_TABLE",
-    [VHOST_USER_SET_LOG_BASE]           =  "VHOST_USER_SET_LOG_BASE",
-    [VHOST_USER_SET_LOG_FD]             =  "VHOST_USER_SET_LOG_FD",
-    [VHOST_USER_SET_VRING_NUM]          =  "VHOST_USER_SET_VRING_NUM",
-    [VHOST_USER_SET_VRING_ADDR]         =  "VHOST_USER_SET_VRING_ADDR",
-    [VHOST_USER_SET_VRING_BASE]         =  "VHOST_USER_SET_VRING_BASE",
-    [VHOST_USER_GET_VRING_BASE]         =  "VHOST_USER_GET_VRING_BASE",
-    [VHOST_USER_SET_VRING_KICK]         =  "VHOST_USER_SET_VRING_KICK",
-    [VHOST_USER_SET_VRING_CALL]         =  "VHOST_USER_SET_VRING_CALL",
-    [VHOST_USER_SET_VRING_ERR]          =  "VHOST_USER_SET_VRING_ERR",
-    [VHOST_USER_GET_PROTOCOL_FEATURES]  =  "VHOST_USER_GET_PROTOCOL_FEATURES",
-    [VHOST_USER_SET_PROTOCOL_FEATURES]  =  "VHOST_USER_SET_PROTOCOL_FEATURES",
-    [VHOST_USER_GET_QUEUE_NUM]          =  "VHOST_USER_GET_QUEUE_NUM",
-    [VHOST_USER_SET_VRING_ENABLE]       =  "VHOST_USER_SET_VRING_ENABLE",
-    [VHOST_USER_SEND_RARP]              =  "VHOST_USER_SEND_RARP",
-    [VHOST_USER_MAX]                    =  "VHOST_USER_MAX",
-};
-
 static void
-print_buffer(uint8_t *buf, size_t len)
+vubr_handle_tx(VuDev *dev, int qidx)
 {
-    int i;
-    printf("Raw buffer:\n");
-    for (i = 0; i < len; i++) {
-        if (i % 16 == 0) {
-            printf("\n");
-        }
-        if (i % 4 == 0) {
-            printf("   ");
-        }
-        printf("%02x ", buf[i]);
-    }
-    printf("\n............................................................\n");
-}
+    VuVirtq *vq = vu_get_queue(dev, qidx);
+    VubrDev *vubr = container_of(dev, VubrDev, vudev);
+    int hdrlen = vubr->hdrlen;
+    VuVirtqElement *elem = NULL;
 
-/* Translate guest physical address to our virtual address.  */
-static uint64_t
-gpa_to_va(VubrDev *dev, uint64_t guest_addr)
-{
-    int i;
-
-    /* Find matching memory region.  */
-    for (i = 0; i < dev->nregions; i++) {
-        VubrDevRegion *r = &dev->regions[i];
-
-        if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
-            return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
-        }
-    }
-
-    assert(!"address not found in regions");
-    return 0;
-}
-
-/* Translate qemu virtual address to our virtual address.  */
-static uint64_t
-qva_to_va(VubrDev *dev, uint64_t qemu_addr)
-{
-    int i;
+    assert(qidx % 2);
 
-    /* Find matching memory region.  */
-    for (i = 0; i < dev->nregions; i++) {
-        VubrDevRegion *r = &dev->regions[i];
+    for (;;) {
+        ssize_t ret;
+        unsigned int out_num;
+        struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
 
-        if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
-            return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+        elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+        if (!elem) {
+            break;
         }
-    }
-
-    assert(!"address not found in regions");
-    return 0;
-}
 
-static void
-vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
-{
-    char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
-    struct iovec iov = {
-        .iov_base = (char *)vmsg,
-        .iov_len = VHOST_USER_HDR_SIZE,
-    };
-    struct msghdr msg = {
-        .msg_iov = &iov,
-        .msg_iovlen = 1,
-        .msg_control = control,
-        .msg_controllen = sizeof(control),
-    };
-    size_t fd_size;
-    struct cmsghdr *cmsg;
-    int rc;
-
-    rc = recvmsg(conn_fd, &msg, 0);
-
-    if (rc == 0) {
-        fprintf(stderr, "Peer disconnected.\n");
-        exit(1);
-    }
-    if (rc < 0) {
-        vubr_die("recvmsg");
-    }
-
-    vmsg->fd_num = 0;
-    for (cmsg = CMSG_FIRSTHDR(&msg);
-         cmsg != NULL;
-         cmsg = CMSG_NXTHDR(&msg, cmsg))
-    {
-        if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
-            fd_size = cmsg->cmsg_len - CMSG_LEN(0);
-            vmsg->fd_num = fd_size / sizeof(int);
-            memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+        out_num = elem->out_num;
+        out_sg = elem->out_sg;
+        if (out_num < 1) {
+            fprintf(stderr, "virtio-net header not in first element\n");
             break;
         }
-    }
-
-    if (vmsg->size > sizeof(vmsg->payload)) {
-        fprintf(stderr,
-                "Error: too big message request: %d, size: vmsg->size: %u, "
-                "while sizeof(vmsg->payload) = %zu\n",
-                vmsg->request, vmsg->size, sizeof(vmsg->payload));
-        exit(1);
-    }
-
-    if (vmsg->size) {
-        rc = read(conn_fd, &vmsg->payload, vmsg->size);
-        if (rc == 0) {
-            vubr_die("recvmsg");
-            fprintf(stderr, "Peer disconnected.\n");
-            exit(1);
+        if (VHOST_USER_BRIDGE_DEBUG) {
+            iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
         }
-        if (rc < 0) {
-            vubr_die("recvmsg");
+
+        if (hdrlen) {
+            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
+                                       out_sg, out_num,
+                                       hdrlen, -1);
+            out_num = sg_num;
+            out_sg = sg;
         }
 
-        assert(rc == vmsg->size);
-    }
-}
+        struct msghdr msg = {
+            .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
+            .msg_namelen = sizeof(struct sockaddr_in),
+            .msg_iov = out_sg,
+            .msg_iovlen = out_num,
+        };
+        do {
+            ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
+        } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
 
-static void
-vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
-{
-    int rc;
+        if (ret == -1) {
+            vubr_die("sendmsg()");
+        }
 
-    do {
-        rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
-    } while (rc < 0 && errno == EINTR);
+        vu_queue_push(dev, vq, elem, 0);
+        vu_queue_notify(dev, vq);
 
-    if (rc < 0) {
-        vubr_die("write");
+        free(elem);
+        elem = NULL;
     }
-}
 
-static void
-vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
-{
-    int slen = sizeof(struct sockaddr_in);
-
-    if (sendto(dev->backend_udp_sock, buf, len, 0,
-               (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
-        vubr_die("sendto()");
-    }
+    free(elem);
 }
 
-static int
-vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
+static void
+iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
 {
-    int slen = sizeof(struct sockaddr_in);
-    int rc;
+    struct iovec *cur;
 
-    rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
-                  (struct sockaddr *) &dev->backend_udp_dest,
-                  (socklen_t *)&slen);
-    if (rc == -1) {
-        vubr_die("recvfrom()");
+    for (cur = front; front != iov; cur++) {
+        bytes -= cur->iov_len;
     }
 
-    return rc;
+    cur->iov_base -= bytes;
+    cur->iov_len += bytes;
 }
 
 static void
-vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
+iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
 {
-    int hdrlen = dev->hdrlen;
-    DPRINT("    hdrlen = %d\n", dev->hdrlen);
+    unsigned i;
 
-    if (VHOST_USER_BRIDGE_DEBUG) {
-        print_buffer(buf, len);
-    }
-    vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
-}
+    for (i = 0; i < iovc; i++, iov++) {
+        if (bytes < iov->iov_len) {
+            iov->iov_len = bytes;
+            return;
+        }
 
-/* Kick the log_call_fd if required. */
-static void
-vubr_log_kick(VubrDev *dev)
-{
-    if (dev->log_call_fd != -1) {
-        DPRINT("Kicking the QEMU's log...\n");
-        eventfd_write(dev->log_call_fd, 1);
+        bytes -= iov->iov_len;
     }
-}
 
-/* Kick the guest if necessary. */
-static void
-vubr_virtqueue_kick(VubrVirtq *vq)
-{
-    if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
-        DPRINT("Kicking the guest...\n");
-        eventfd_write(vq->call_fd, 1);
-    }
+    assert(!"couldn't truncate iov");
 }
 
 static void
-vubr_log_page(uint8_t *log_table, uint64_t page)
+vubr_backend_recv_cb(int sock, void *ctx)
 {
-    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
-    atomic_or(&log_table[page / 8], 1 << (page % 8));
-}
+    VubrDev *vubr = (VubrDev *) ctx;
+    VuDev *dev = &vubr->vudev;
+    VuVirtq *vq = vu_get_queue(dev, 0);
+    VuVirtqElement *elem = NULL;
+    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
+    struct virtio_net_hdr_mrg_rxbuf mhdr;
+    unsigned mhdr_cnt = 0;
+    int hdrlen = vubr->hdrlen;
+    int i = 0;
+    struct virtio_net_hdr hdr = {
+        .flags = 0,
+        .gso_type = VIRTIO_NET_HDR_GSO_NONE
+    };
 
-static void
-vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
-{
-    uint64_t page;
+    DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
+    DPRINT("    hdrlen = %d\n", hdrlen);
 
-    if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
-        !dev->log_table || !length) {
+    if (!vu_queue_enabled(dev, vq) ||
+        !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
+        DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
         return;
     }
 
-    assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
-
-    page = address / VHOST_LOG_PAGE;
-    while (page * VHOST_LOG_PAGE < address + length) {
-        vubr_log_page(dev->log_table, page);
-        page += VHOST_LOG_PAGE;
-    }
-    vubr_log_kick(dev);
-}
-
-static void
-vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
-{
-    struct vring_desc *desc = vq->desc;
-    struct vring_avail *avail = vq->avail;
-    struct vring_used *used = vq->used;
-    uint64_t log_guest_addr = vq->log_guest_addr;
-    int32_t remaining_len = len;
-
-    unsigned int size = vq->size;
-
-    uint16_t avail_index = atomic_mb_read(&avail->idx);
-
-    /* We check the available descriptors before posting the
-     * buffer, so here we assume that enough available
-     * descriptors. */
-    assert(vq->last_avail_index != avail_index);
-    uint16_t a_index = vq->last_avail_index % size;
-    uint16_t u_index = vq->last_used_index % size;
-    uint16_t d_index = avail->ring[a_index];
-
-    int i = d_index;
-    uint32_t written_len = 0;
-
     do {
-        DPRINT("Post packet to guest on vq:\n");
-        DPRINT("    size             = %d\n", vq->size);
-        DPRINT("    last_avail_index = %d\n", vq->last_avail_index);
-        DPRINT("    last_used_index  = %d\n", vq->last_used_index);
-        DPRINT("    a_index = %d\n", a_index);
-        DPRINT("    u_index = %d\n", u_index);
-        DPRINT("    d_index = %d\n", d_index);
-        DPRINT("    desc[%d].addr    = 0x%016"PRIx64"\n", i, desc[i].addr);
-        DPRINT("    desc[%d].len     = %d\n", i, desc[i].len);
-        DPRINT("    desc[%d].flags   = %d\n", i, desc[i].flags);
-        DPRINT("    avail->idx = %d\n", avail_index);
-        DPRINT("    used->idx  = %d\n", used->idx);
-
-        if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
-            /* FIXME: we should find writable descriptor. */
-            fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
-            exit(1);
-        }
-
-        void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
-        uint32_t chunk_len = desc[i].len;
-        uint32_t chunk_write_len = MIN(remaining_len, chunk_len);
+        struct iovec *sg;
+        ssize_t ret, total = 0;
+        unsigned int num;
 
-        memcpy(chunk_start, buf + written_len, chunk_write_len);
-        vubr_log_write(dev, desc[i].addr, chunk_write_len);
-        remaining_len -= chunk_write_len;
-        written_len += chunk_write_len;
-
-        if ((remaining_len == 0) || !(desc[i].flags & VRING_DESC_F_NEXT)) {
+        elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+        if (!elem) {
             break;
         }
 
-        i = desc[i].next;
-    } while (1);
-
-    if (remaining_len > 0) {
-            fprintf(stderr,
-                    "Too long packet for RX, remaining_len = %d, Dropping...\n",
-                    remaining_len);
-            return;
-    }
-
-    /* Add descriptor to the used ring. */
-    used->ring[u_index].id = d_index;
-    used->ring[u_index].len = len;
-    vubr_log_write(dev,
-                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
-                   sizeof(used->ring[u_index]));
-
-    vq->last_avail_index++;
-    vq->last_used_index++;
-
-    atomic_mb_set(&used->idx, vq->last_used_index);
-    vubr_log_write(dev,
-                   log_guest_addr + offsetof(struct vring_used, idx),
-                   sizeof(used->idx));
-
-    /* Kick the guest if necessary. */
-    vubr_virtqueue_kick(vq);
-}
-
-static int
-vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
-{
-    struct vring_desc *desc = vq->desc;
-    struct vring_avail *avail = vq->avail;
-    struct vring_used *used = vq->used;
-    uint64_t log_guest_addr = vq->log_guest_addr;
-
-    unsigned int size = vq->size;
-
-    uint16_t a_index = vq->last_avail_index % size;
-    uint16_t u_index = vq->last_used_index % size;
-    uint16_t d_index = avail->ring[a_index];
-
-    uint32_t i, len = 0;
-    size_t buf_size = 4096;
-    uint8_t buf[4096];
-
-    DPRINT("Chunks: ");
-    i = d_index;
-    do {
-        void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
-        uint32_t chunk_len = desc[i].len;
-
-        assert(!(desc[i].flags & VRING_DESC_F_WRITE));
-
-        if (len + chunk_len < buf_size) {
-            memcpy(buf + len, chunk_start, chunk_len);
-            DPRINT("%d ", chunk_len);
-        } else {
-            fprintf(stderr, "Error: too long packet. Dropping...\n");
+        if (elem->in_num < 1) {
+            fprintf(stderr, "virtio-net contains no in buffers\n");
             break;
         }
 
-        len += chunk_len;
-
-        if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
-            break;
+        sg = elem->in_sg;
+        num = elem->in_num;
+        if (i == 0) {
+            if (hdrlen == 12) {
+                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
+                                    sg, elem->in_num,
+                                    offsetof(typeof(mhdr), num_buffers),
+                                    sizeof(mhdr.num_buffers));
+            }
+            iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
+            total += hdrlen;
+            assert(iov_discard_front(&sg, &num, hdrlen) == hdrlen);
         }
 
-        i = desc[i].next;
-    } while (1);
-    DPRINT("\n");
-
-    if (!len) {
-        return -1;
-    }
-
-    /* Add descriptor to the used ring. */
-    used->ring[u_index].id = d_index;
-    used->ring[u_index].len = len;
-    vubr_log_write(dev,
-                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
-                   sizeof(used->ring[u_index]));
-
-    vubr_consume_raw_packet(dev, buf, len);
-
-    return 0;
-}
+        struct msghdr msg = {
+            .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
+            .msg_namelen = sizeof(struct sockaddr_in),
+            .msg_iov = sg,
+            .msg_iovlen = elem->in_num,
+            .msg_flags = MSG_DONTWAIT,
+        };
+        do {
+            ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
+        } while (ret == -1 && (errno == EINTR));
 
-static void
-vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
-{
-    struct vring_avail *avail = vq->avail;
-    struct vring_used *used = vq->used;
-    uint64_t log_guest_addr = vq->log_guest_addr;
-
-    while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
-        vubr_process_desc(dev, vq);
-        vq->last_avail_index++;
-        vq->last_used_index++;
-    }
+        if (i == 0) {
+            iov_restore_front(elem->in_sg, sg, hdrlen);
+        }
 
-    atomic_mb_set(&used->idx, vq->last_used_index);
-    vubr_log_write(dev,
-                   log_guest_addr + offsetof(struct vring_used, idx),
-                   sizeof(used->idx));
-}
+        if (ret == -1) {
+            if (errno == EWOULDBLOCK) {
+                vu_queue_rewind(dev, vq, 1);
+                break;
+            }
 
-static void
-vubr_backend_recv_cb(int sock, void *ctx)
-{
-    VubrDev *dev = (VubrDev *) ctx;
-    VubrVirtq *rx_vq = &dev->vq[0];
-    uint8_t buf[4096];
-    struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
-    int hdrlen = dev->hdrlen;
-    int buflen = sizeof(buf);
-    int len;
-
-    if (!dev->ready) {
-        return;
-    }
+            vubr_die("recvmsg()");
+        }
 
-    DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
-    DPRINT("    hdrlen = %d\n", hdrlen);
+        total += ret;
+        iov_truncate(elem->in_sg, elem->in_num, total);
+        vu_queue_fill(dev, vq, elem, total, i++);
 
-    uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
+        free(elem);
+        elem = NULL;
+    } while (false); /* could loop if DONTWAIT worked? */
 
-    /* If there is no available descriptors, just do nothing.
-     * The buffer will be handled by next arrived UDP packet,
-     * or next kick on receive virtq. */
-    if (rx_vq->last_avail_index == avail_index) {
-        DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
-        return;
+    if (mhdr_cnt) {
+        mhdr.num_buffers = i;
+        iov_from_buf(mhdr_sg, mhdr_cnt,
+                     0,
+                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
     }
 
-    memset(buf, 0, hdrlen);
-    /* TODO: support mergeable buffers. */
-    if (hdrlen == 12)
-        hdr->num_buffers = 1;
-    len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
+    vu_queue_flush(dev, vq, i);
+    vu_queue_notify(dev, vq);
 
-    vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
+    free(elem);
 }
 
 static void
-vubr_kick_cb(int sock, void *ctx)
+vubr_receive_cb(int sock, void *ctx)
 {
-    VubrDev *dev = (VubrDev *) ctx;
-    eventfd_t kick_data;
-    ssize_t rc;
+    VubrDev *vubr = (VubrDev *)ctx;
 
-    rc = eventfd_read(sock, &kick_data);
-    if (rc == -1) {
-        vubr_die("eventfd_read()");
-    } else {
-        DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
-        vubr_process_avail(dev, &dev->vq[1]);
+    if (!vu_dispatch(&vubr->vudev)) {
+        fprintf(stderr, "Error while dispatching\n");
     }
 }
 
-static int
-vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
-}
+typedef struct WatchData {
+    VuDev *dev;
+    vu_watch_cb cb;
+    void *data;
+} WatchData;
 
-static int
-vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+watch_cb(int sock, void *ctx)
 {
-    vmsg->payload.u64 =
-            ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
-             (1ULL << VHOST_F_LOG_ALL) |
-             (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-             (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
-
-    vmsg->size = sizeof(vmsg->payload.u64);
+    struct WatchData *wd = ctx;
 
-    DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
-    /* Reply */
-    return 1;
+    wd->cb(wd->dev, VU_WATCH_IN, wd->data);
 }
 
-static int
-vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_set_watch(VuDev *dev, int fd, int condition,
+               vu_watch_cb cb, void *data)
 {
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
-    dev->features = vmsg->payload.u64;
-    if ((dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
-        (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
-        dev->hdrlen = 12;
-    } else {
-        dev->hdrlen = 10;
-    }
+    VubrDev *vubr = container_of(dev, VubrDev, vudev);
+    static WatchData watches[FD_SETSIZE];
+    struct WatchData *wd = &watches[fd];
 
-    return 0;
-}
-
-static int
-vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    return 0;
+    wd->cb = cb;
+    wd->data = data;
+    wd->dev = dev;
+    dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
 }
 
 static void
-vubr_close_log(VubrDev *dev)
+vubr_remove_watch(VuDev *dev, int fd)
 {
-    if (dev->log_table) {
-        if (munmap(dev->log_table, dev->log_size) != 0) {
-            vubr_die("munmap()");
-        }
+    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 
-        dev->log_table = 0;
-    }
-    if (dev->log_call_fd != -1) {
-        close(dev->log_call_fd);
-        dev->log_call_fd = -1;
-    }
-}
-
-static int
-vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    vubr_close_log(dev);
-    dev->ready = 0;
-    dev->features = 0;
-    return 0;
+    dispatcher_remove(&vubr->dispatcher, fd);
 }
 
 static int
-vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
+vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
 {
-    int i;
-    VhostUserMemory *memory = &vmsg->payload.memory;
-    dev->nregions = memory->nregions;
-
-    DPRINT("Nregions: %d\n", memory->nregions);
-    for (i = 0; i < dev->nregions; i++) {
-        void *mmap_addr;
-        VhostUserMemoryRegion *msg_region = &memory->regions[i];
-        VubrDevRegion *dev_region = &dev->regions[i];
-
-        DPRINT("Region %d\n", i);
-        DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
-               msg_region->guest_phys_addr);
-        DPRINT("    memory_size:     0x%016"PRIx64"\n",
-               msg_region->memory_size);
-        DPRINT("    userspace_addr   0x%016"PRIx64"\n",
-               msg_region->userspace_addr);
-        DPRINT("    mmap_offset      0x%016"PRIx64"\n",
-               msg_region->mmap_offset);
-
-        dev_region->gpa = msg_region->guest_phys_addr;
-        dev_region->size = msg_region->memory_size;
-        dev_region->qva = msg_region->userspace_addr;
-        dev_region->mmap_offset = msg_region->mmap_offset;
-
-        /* We don't use offset argument of mmap() since the
-         * mapped address has to be page aligned, and we use huge
-         * pages.  */
-        mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
-                         PROT_READ | PROT_WRITE, MAP_SHARED,
-                         vmsg->fds[i], 0);
-
-        if (mmap_addr == MAP_FAILED) {
-            vubr_die("mmap");
-        }
-        dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
-        DPRINT("    mmap_addr:       0x%016"PRIx64"\n", dev_region->mmap_addr);
-
-        close(vmsg->fds[i]);
-    }
-
+    DPRINT("Function %s() not implemented yet.\n", __func__);
     return 0;
 }
 
 static int
-vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
 {
-    int fd;
-    uint64_t log_mmap_size, log_mmap_offset;
-    void *rc;
-
-    assert(vmsg->fd_num == 1);
-    fd = vmsg->fds[0];
-
-    assert(vmsg->size == sizeof(vmsg->payload.log));
-    log_mmap_offset = vmsg->payload.log.mmap_offset;
-    log_mmap_size = vmsg->payload.log.mmap_size;
-    DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
-    DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
-
-    rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
-              log_mmap_offset);
-    if (rc == MAP_FAILED) {
-        vubr_die("mmap");
+    switch (vmsg->request) {
+    case VHOST_USER_SEND_RARP:
+        *do_reply = vubr_send_rarp_exec(dev, vmsg);
+        return 1;
+    default:
+        /* let the library handle the rest */
+        return 0;
     }
-    dev->log_table = rc;
-    dev->log_size = log_mmap_size;
 
-    vmsg->size = sizeof(vmsg->payload.u64);
-    /* Reply */
-    return 1;
-}
-
-static int
-vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    assert(vmsg->fd_num == 1);
-    dev->log_call_fd = vmsg->fds[0];
-    DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
     return 0;
 }
 
-static int
-vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_set_features(VuDev *dev, uint64_t features)
 {
-    unsigned int index = vmsg->payload.state.index;
-    unsigned int num = vmsg->payload.state.num;
-
-    DPRINT("State.index: %d\n", index);
-    DPRINT("State.num:   %d\n", num);
-    dev->vq[index].size = num;
-    return 0;
-}
+    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 
-static int
-vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    struct vhost_vring_addr *vra = &vmsg->payload.addr;
-    unsigned int index = vra->index;
-    VubrVirtq *vq = &dev->vq[index];
-
-    DPRINT("vhost_vring_addr:\n");
-    DPRINT("    index:  %d\n", vra->index);
-    DPRINT("    flags:  %d\n", vra->flags);
-    DPRINT("    desc_user_addr:   0x%016llx\n", vra->desc_user_addr);
-    DPRINT("    used_user_addr:   0x%016llx\n", vra->used_user_addr);
-    DPRINT("    avail_user_addr:  0x%016llx\n", vra->avail_user_addr);
-    DPRINT("    log_guest_addr:   0x%016llx\n", vra->log_guest_addr);
-
-    vq->desc = (struct vring_desc *)(uintptr_t)qva_to_va(dev, vra->desc_user_addr);
-    vq->used = (struct vring_used *)(uintptr_t)qva_to_va(dev, vra->used_user_addr);
-    vq->avail = (struct vring_avail *)(uintptr_t)qva_to_va(dev, vra->avail_user_addr);
-    vq->log_guest_addr = vra->log_guest_addr;
-
-    DPRINT("Setting virtq addresses:\n");
-    DPRINT("    vring_desc  at %p\n", vq->desc);
-    DPRINT("    vring_used  at %p\n", vq->used);
-    DPRINT("    vring_avail at %p\n", vq->avail);
-
-    vq->last_used_index = vq->used->idx;
-
-    if (vq->last_avail_index != vq->used->idx) {
-        DPRINT("Last avail index != used index: %d != %d, resuming",
-               vq->last_avail_index, vq->used->idx);
-        vq->last_avail_index = vq->used->idx;
+    if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
+        (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
+        vubr->hdrlen = 12;
+    } else {
+        vubr->hdrlen = 10;
     }
-
-    return 0;
 }
 
-static int
-vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    unsigned int index = vmsg->payload.state.index;
-    unsigned int num = vmsg->payload.state.num;
-
-    DPRINT("State.index: %d\n", index);
-    DPRINT("State.num:   %d\n", num);
-    dev->vq[index].last_avail_index = num;
-
-    return 0;
-}
-
-static int
-vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    unsigned int index = vmsg->payload.state.index;
-
-    DPRINT("State.index: %d\n", index);
-    vmsg->payload.state.num = dev->vq[index].last_avail_index;
-    vmsg->size = sizeof(vmsg->payload.state);
-    /* FIXME: this is a work-around for a bug in QEMU enabling
-     * too early vrings. When protocol features are enabled,
-     * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
-    dev->ready = 0;
-
-    if (dev->vq[index].call_fd != -1) {
-        close(dev->vq[index].call_fd);
-        dev->vq[index].call_fd = -1;
-    }
-    if (dev->vq[index].kick_fd != -1) {
-        close(dev->vq[index].kick_fd);
-        dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
-        dev->vq[index].kick_fd = -1;
-    }
-
-    /* Reply */
-    return 1;
-}
-
-static int
-vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static uint64_t
+vubr_get_features(VuDev *dev)
 {
-    uint64_t u64_arg = vmsg->payload.u64;
-    int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
-
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
-    assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
-    assert(vmsg->fd_num == 1);
-
-    if (dev->vq[index].kick_fd != -1) {
-        close(dev->vq[index].kick_fd);
-        dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
-    }
-    dev->vq[index].kick_fd = vmsg->fds[0];
-    DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
-
-    if (index % 2 == 1) {
-        /* TX queue. */
-        dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
-                       dev, vubr_kick_cb);
-
-        DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
-               dev->vq[index].kick_fd, index);
-    }
-    /* We temporarily use this hack to determine that both TX and RX
-     * queues are set up and ready for processing.
-     * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
-     * actual kicks. */
-    if (dev->vq[0].kick_fd != -1 &&
-        dev->vq[1].kick_fd != -1) {
-        dev->ready = 1;
-        DPRINT("vhost-user-bridge is ready for processing queues.\n");
-    }
-    return 0;
-
+    return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
+        1ULL << VIRTIO_NET_F_MRG_RXBUF;
 }
 
-static int
-vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_queue_set_started(VuDev *dev, int qidx, bool started)
 {
-    uint64_t u64_arg = vmsg->payload.u64;
-    int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
+    VuVirtq *vq = vu_get_queue(dev, qidx);
 
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
-    assert(vmsg->fd_num == 1);
-
-    if (dev->vq[index].call_fd != -1) {
-        close(dev->vq[index].call_fd);
+    if (qidx % 2 == 1) {
+        vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
     }
-    dev->vq[index].call_fd = vmsg->fds[0];
-    DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
-
-    return 0;
-}
-
-static int
-vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    return 0;
-}
-
-static int
-vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    vmsg->size = sizeof(vmsg->payload.u64);
-
-    /* Reply */
-    return 1;
 }
 
-static int
-vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    /* FIXME: unimplented */
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    return 0;
-}
-
-static int
-vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
-}
-
-static int
-vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_panic(VuDev *dev, const char *msg)
 {
-    unsigned int index = vmsg->payload.state.index;
-    unsigned int enable = vmsg->payload.state.num;
+    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 
-    DPRINT("State.index: %d\n", index);
-    DPRINT("State.enable:   %d\n", enable);
-    dev->vq[index].enable = enable;
-    return 0;
-}
+    fprintf(stderr, "PANIC: %s\n", msg);
 
-static int
-vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
+    dispatcher_remove(&vubr->dispatcher, dev->sock);
+    vubr->quit = 1;
 }
 
-static int
-vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    /* Print out generic part of the request. */
-    DPRINT(
-           "==================   Vhost user message from QEMU   ==================\n");
-    DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
-           vmsg->request);
-    DPRINT("Flags:   0x%x\n", vmsg->flags);
-    DPRINT("Size:    %d\n", vmsg->size);
-
-    if (vmsg->fd_num) {
-        int i;
-        DPRINT("Fds:");
-        for (i = 0; i < vmsg->fd_num; i++) {
-            DPRINT(" %d", vmsg->fds[i]);
-        }
-        DPRINT("\n");
-    }
-
-    switch (vmsg->request) {
-    case VHOST_USER_NONE:
-        return vubr_none_exec(dev, vmsg);
-    case VHOST_USER_GET_FEATURES:
-        return vubr_get_features_exec(dev, vmsg);
-    case VHOST_USER_SET_FEATURES:
-        return vubr_set_features_exec(dev, vmsg);
-    case VHOST_USER_SET_OWNER:
-        return vubr_set_owner_exec(dev, vmsg);
-    case VHOST_USER_RESET_OWNER:
-        return vubr_reset_device_exec(dev, vmsg);
-    case VHOST_USER_SET_MEM_TABLE:
-        return vubr_set_mem_table_exec(dev, vmsg);
-    case VHOST_USER_SET_LOG_BASE:
-        return vubr_set_log_base_exec(dev, vmsg);
-    case VHOST_USER_SET_LOG_FD:
-        return vubr_set_log_fd_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_NUM:
-        return vubr_set_vring_num_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_ADDR:
-        return vubr_set_vring_addr_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_BASE:
-        return vubr_set_vring_base_exec(dev, vmsg);
-    case VHOST_USER_GET_VRING_BASE:
-        return vubr_get_vring_base_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_KICK:
-        return vubr_set_vring_kick_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_CALL:
-        return vubr_set_vring_call_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_ERR:
-        return vubr_set_vring_err_exec(dev, vmsg);
-    case VHOST_USER_GET_PROTOCOL_FEATURES:
-        return vubr_get_protocol_features_exec(dev, vmsg);
-    case VHOST_USER_SET_PROTOCOL_FEATURES:
-        return vubr_set_protocol_features_exec(dev, vmsg);
-    case VHOST_USER_GET_QUEUE_NUM:
-        return vubr_get_queue_num_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_ENABLE:
-        return vubr_set_vring_enable_exec(dev, vmsg);
-    case VHOST_USER_SEND_RARP:
-        return vubr_send_rarp_exec(dev, vmsg);
-
-    case VHOST_USER_MAX:
-        assert(vmsg->request != VHOST_USER_MAX);
-    }
-    return 0;
-}
-
-static void
-vubr_receive_cb(int sock, void *ctx)
-{
-    VubrDev *dev = (VubrDev *) ctx;
-    VhostUserMsg vmsg;
-    int reply_requested;
-
-    vubr_message_read(sock, &vmsg);
-    reply_requested = vubr_execute_request(dev, &vmsg);
-    if (reply_requested) {
-        /* Set the version in the flags when sending the reply */
-        vmsg.flags &= ~VHOST_USER_VERSION_MASK;
-        vmsg.flags |= VHOST_USER_VERSION;
-        vmsg.flags |= VHOST_USER_REPLY_MASK;
-        vubr_message_write(sock, &vmsg);
-    }
-}
+static const VuDevIface vuiface = {
+    .get_features = vubr_get_features,
+    .set_features = vubr_set_features,
+    .process_msg = vubr_process_msg,
+    .queue_set_started = vubr_queue_set_started,
+};
 
 static void
 vubr_accept_cb(int sock, void *ctx)
@@ -1199,6 +479,14 @@ vubr_accept_cb(int sock, void *ctx)
         vubr_die("accept()");
     }
     DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
+
+    vu_init(&dev->vudev,
+            conn_fd,
+            vubr_panic,
+            vubr_set_watch,
+            vubr_remove_watch,
+            &vuiface);
+
     dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
     dispatcher_remove(&dev->dispatcher, sock);
 }
@@ -1207,29 +495,10 @@ static VubrDev *
 vubr_new(const char *path, bool client)
 {
     VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
-    dev->nregions = 0;
-    int i;
     struct sockaddr_un un;
     CallbackFunc cb;
     size_t len;
 
-    for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
-        dev->vq[i] = (VubrVirtq) {
-            .call_fd = -1, .kick_fd = -1,
-            .size = 0,
-            .last_avail_index = 0, .last_used_index = 0,
-            .desc = 0, .avail = 0, .used = 0,
-            .enable = 0,
-        };
-    }
-
-    /* Init log */
-    dev->log_call_fd = -1;
-    dev->log_size = 0;
-    dev->log_table = 0;
-    dev->ready = 0;
-    dev->features = 0;
-
     /* Get a UNIX socket. */
     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
     if (dev->sock == -1) {
@@ -1257,10 +526,17 @@ vubr_new(const char *path, bool client)
         if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
             vubr_die("connect");
         }
+        vu_init(&dev->vudev,
+                dev->sock,
+                vubr_panic,
+                vubr_set_watch,
+                vubr_remove_watch,
+                &vuiface);
         cb = vubr_receive_cb;
     }
 
     dispatcher_init(&dev->dispatcher);
+
     dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
 
     return dev;
@@ -1341,7 +617,7 @@ vubr_backend_udp_setup(VubrDev *dev,
 static void
 vubr_run(VubrDev *dev)
 {
-    while (1) {
+    while (!dev->quit) {
         /* timeout 200ms */
         dispatcher_wait(&dev->dispatcher, 200000);
         /* Here one can try polling strategy. */
@@ -1417,6 +693,9 @@ main(int argc, char *argv[])
 
     vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
     vubr_run(dev);
+
+    vu_deinit(&dev->vudev);
+
     return 0;
 
 out:
diff --git a/tests/Makefile.include b/tests/Makefile.include
index de51634..48cba69 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -685,7 +685,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y)
 tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y)
 tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
 tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
-tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o
+tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y)
 tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
 tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
 
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 07/47] intel_iommu: fixing source id during IOTLB hash key calculation
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (5 preceding siblings ...)
  2016-11-10 16:12 ` [Qemu-devel] [PULL 06/47] tests/vhost-user-bridge: use contrib/libvhost-user Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 08/47] virtio: rename virtqueue_discard to virtqueue_unpop Michael S. Tsirkin
                   ` (40 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Jason Wang, Paolo Bonzini, Richard Henderson,
	Eduardo Habkost

From: Jason Wang <jasowang@redhat.com>

Using uint8_t for source id will lose bus num and get the
wrong/invalid IOTLB entry. Fixing by using uint16_t instead and
enlarge level shift.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/intel_iommu_internal.h | 2 +-
 hw/i386/intel_iommu.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 0829a50..11abfa2 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -115,7 +115,7 @@
 
 /* The shift of source_id in the key of IOTLB hash table */
 #define VTD_IOTLB_SID_SHIFT         36
-#define VTD_IOTLB_LVL_SHIFT         44
+#define VTD_IOTLB_LVL_SHIFT         52
 #define VTD_IOTLB_MAX_SIZE          1024    /* Max size of the hash table */
 
 /* IOTLB_REG */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1655a65..5a12ae7 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -218,7 +218,7 @@ static void vtd_reset_iotlb(IntelIOMMUState *s)
     g_hash_table_remove_all(s->iotlb);
 }
 
-static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint8_t source_id,
+static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
                                   uint32_t level)
 {
     return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 08/47] virtio: rename virtqueue_discard to virtqueue_unpop
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (6 preceding siblings ...)
  2016-11-10 16:12 ` [Qemu-devel] [PULL 07/47] intel_iommu: fixing source id during IOTLB hash key calculation Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 09/47] virtio: make virtqueue_alloc_element static Michael S. Tsirkin
                   ` (39 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Ladi Prosek, Stefan Hajnoczi, Jason Wang

From: Ladi Prosek <lprosek@redhat.com>

The function undoes the effect of virtqueue_pop and doesn't do anything
destructive or irreversible so virtqueue_unpop is a more fitting name.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/virtio/virtio.h | 4 ++--
 hw/net/virtio-net.c        | 2 +-
 hw/virtio/virtio-balloon.c | 2 +-
 hw/virtio/virtio.c         | 8 ++++----
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index ac65d6a..6a2f57c 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -160,8 +160,8 @@ void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 void virtqueue_flush(VirtQueue *vq, unsigned int count);
 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
                               unsigned int len);
-void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
-                       unsigned int len);
+void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
+                     unsigned int len);
 bool virtqueue_rewind(VirtQueue *vq, unsigned int num);
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len, unsigned int idx);
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 06bfe4b..20aa63e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1177,7 +1177,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
          * must have consumed the complete packet.
          * Otherwise, drop it. */
         if (!n->mergeable_rx_bufs && offset < size) {
-            virtqueue_discard(q->rx_vq, elem, total);
+            virtqueue_unpop(q->rx_vq, elem, total);
             g_free(elem);
             return size;
         }
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index cfba053..884570a 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -456,7 +456,7 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
 
     if (s->stats_vq_elem != NULL) {
-        virtqueue_discard(s->svq, s->stats_vq_elem, 0);
+        virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
         g_free(s->stats_vq_elem);
         s->stats_vq_elem = NULL;
     }
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index bcbcfe0..3a76dc6 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -279,7 +279,7 @@ void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
     virtqueue_unmap_sg(vq, elem, len);
 }
 
-/* virtqueue_discard:
+/* virtqueue_unpop:
  * @vq: The #VirtQueue
  * @elem: The #VirtQueueElement
  * @len: number of bytes written
@@ -287,8 +287,8 @@ void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
  * Pretend the most recent element wasn't popped from the virtqueue.  The next
  * call to virtqueue_pop() will refetch the element.
  */
-void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
-                       unsigned int len)
+void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
+                     unsigned int len)
 {
     vq->last_avail_idx--;
     virtqueue_detach_element(vq, elem, len);
@@ -301,7 +301,7 @@ void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
  * Pretend that elements weren't popped from the virtqueue.  The next
  * virtqueue_pop() will refetch the oldest element.
  *
- * Use virtqueue_discard() instead if you have a VirtQueueElement.
+ * Use virtqueue_unpop() instead if you have a VirtQueueElement.
  *
  * Returns: true on success, false if @num is greater than the number of in use
  * elements.
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 09/47] virtio: make virtqueue_alloc_element static
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (7 preceding siblings ...)
  2016-11-10 16:12 ` [Qemu-devel] [PULL 08/47] virtio: rename virtqueue_discard to virtqueue_unpop Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 10/47] pcie_aer: Convert pcie_aer_init to Error Michael S. Tsirkin
                   ` (38 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Ladi Prosek, Stefan Hajnoczi

From: Ladi Prosek <lprosek@redhat.com>

The function does not fully initialize the returned VirtQueueElement and should
be used only internally from the virtio module.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/virtio/virtio.h | 1 -
 hw/virtio/virtio.c         | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 6a2f57c..f12a1a8 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -154,7 +154,6 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
 
 void virtio_del_queue(VirtIODevice *vdev, int n);
 
-void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num);
 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len);
 void virtqueue_flush(VirtQueue *vq, unsigned int count);
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 3a76dc6..1df5f4e 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -632,7 +632,7 @@ void virtqueue_map(VirtQueueElement *elem)
                         VIRTQUEUE_MAX_SIZE, 0);
 }
 
-void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
+static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
 {
     VirtQueueElement *elem;
     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 10/47] pcie_aer: Convert pcie_aer_init to Error
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (8 preceding siblings ...)
  2016-11-10 16:12 ` [Qemu-devel] [PULL 09/47] virtio: make virtqueue_alloc_element static Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 16:12 ` [Qemu-devel] [PULL 11/47] virtio-crypto: tag as not hotpluggable and migration Michael S. Tsirkin
                   ` (37 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Cao jin, Marcel Apfelbaum, Dmitry Fleytman, Jason Wang

From: Cao jin <caoj.fnst@cn.fujitsu.com>

When user specify invalid property aer_log_max, device should fail to
create, and report appropriate message.

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/pci/pcie_aer.h          |  3 ++-
 hw/net/e1000e.c                    |  2 +-
 hw/pci-bridge/ioh3420.c            |  3 ++-
 hw/pci-bridge/xio3130_downstream.c |  3 ++-
 hw/pci-bridge/xio3130_upstream.c   |  3 ++-
 hw/pci/pcie_aer.c                  | 17 +++++++----------
 6 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index c2ee4e2..1cce61a 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -87,7 +87,8 @@ struct PCIEAERErr {
 
 extern const VMStateDescription vmstate_pcie_aer_log;
 
-int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size);
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size,
+                  Error **errp);
 void pcie_aer_exit(PCIDevice *dev);
 void pcie_aer_write_config(PCIDevice *dev,
                            uint32_t addr, uint32_t val, int len);
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index 4994e1c..89f96eb 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -472,7 +472,7 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp)
         hw_error("Failed to initialize PM capability");
     }
 
-    if (pcie_aer_init(pci_dev, e1000e_aer_offset, PCI_ERR_SIZEOF) < 0) {
+    if (pcie_aer_init(pci_dev, e1000e_aer_offset, PCI_ERR_SIZEOF, NULL) < 0) {
         hw_error("Failed to initialize AER capability");
     }
 
diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index c8b5ac4..04180af 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -135,8 +135,9 @@ static int ioh3420_initfn(PCIDevice *d)
         goto err_pcie_cap;
     }
 
-    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
+    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF, &err);
     if (rc < 0) {
+        error_report_err(err);
         goto err;
     }
     pcie_aer_root_init(d);
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index cef6e13..5713341 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -97,8 +97,9 @@ static int xio3130_downstream_initfn(PCIDevice *d)
         goto err_pcie_cap;
     }
 
-    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
+    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF, &err);
     if (rc < 0) {
+        error_report_err(err);
         goto err;
     }
 
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index 4ad0440..94c1691 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -85,8 +85,9 @@ static int xio3130_upstream_initfn(PCIDevice *d)
     pcie_cap_flr_init(d);
     pcie_cap_deverr_init(d);
 
-    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
+    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF, &err);
     if (rc < 0) {
+        error_report_err(err);
         goto err;
     }
 
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 048ce6a..2a4bd5a 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -29,6 +29,7 @@
 #include "hw/pci/msi.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci/pcie_regs.h"
+#include "qapi/error.h"
 
 //#define DEBUG_PCIE
 #ifdef DEBUG_PCIE
@@ -96,21 +97,17 @@ static void aer_log_clear_all_err(PCIEAERLog *aer_log)
     aer_log->log_num = 0;
 }
 
-int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size,
+                  Error **errp)
 {
-    PCIExpressDevice *exp;
-
     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
                         offset, size);
-    exp = &dev->exp;
-    exp->aer_cap = offset;
+    dev->exp.aer_cap = offset;
 
-    /* log_max is property */
-    if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) {
-        dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
-    }
-    /* clip down the value to avoid unreasobale memory usage */
+    /* clip down the value to avoid unreasonable memory usage */
     if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
+        error_setg(errp, "Invalid aer_log_max %d. The max number of aer log "
+                "is %d", dev->exp.aer_log.log_max, PCIE_AER_LOG_MAX_LIMIT);
         return -EINVAL;
     }
     dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 11/47] virtio-crypto: tag as not hotpluggable and migration
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (9 preceding siblings ...)
  2016-11-10 16:12 ` [Qemu-devel] [PULL 10/47] pcie_aer: Convert pcie_aer_init to Error Michael S. Tsirkin
@ 2016-11-10 16:12 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 12/47] intel_iommu: fix several incorrect endianess and bit fields Michael S. Tsirkin
                   ` (36 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:12 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Gonglei

From: Gonglei <arei.gonglei@huawei.com>

Currently the virtio-crypto device hasn't supported
hotpluggable and live migration well. Let's tag it
as not hotpluggable and migration actively and reopen
them once we support them well.

Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/virtio-crypto-pci.c | 2 +-
 hw/virtio/virtio-crypto.c     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c
index 21d9984..a1b0906 100644
--- a/hw/virtio/virtio-crypto-pci.c
+++ b/hw/virtio/virtio-crypto-pci.c
@@ -48,7 +48,7 @@ static void virtio_crypto_pci_class_init(ObjectClass *klass, void *data)
     k->realize = virtio_crypto_pci_realize;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     dc->props = virtio_crypto_pci_properties;
-
+    dc->hotpluggable = false;
     pcidev_k->class_id = PCI_CLASS_OTHERS;
 }
 
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 170114f..3293843 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -813,6 +813,7 @@ static void virtio_crypto_device_unrealize(DeviceState *dev, Error **errp)
 
 static const VMStateDescription vmstate_virtio_crypto = {
     .name = "virtio-crypto",
+    .unmigratable = 1,
     .minimum_version_id = VIRTIO_CRYPTO_VM_VERSION,
     .version_id = VIRTIO_CRYPTO_VM_VERSION,
     .fields = (VMStateField[]) {
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 12/47] intel_iommu: fix several incorrect endianess and bit fields
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (10 preceding siblings ...)
  2016-11-10 16:12 ` [Qemu-devel] [PULL 11/47] virtio-crypto: tag as not hotpluggable and migration Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 13/47] intel_iommu: fix incorrect assert Michael S. Tsirkin
                   ` (35 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Peter Xu, Paolo Bonzini, Richard Henderson,
	Eduardo Habkost

From: Peter Xu <peterx@redhat.com>

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/i386/intel_iommu.h | 9 ++++-----
 hw/i386/intel_iommu.c         | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 1989c1e..405c9d1 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -123,7 +123,6 @@ enum {
 union VTD_IR_TableEntry {
     struct {
 #ifdef HOST_WORDS_BIGENDIAN
-        uint32_t dest_id:32;         /* Destination ID */
         uint32_t __reserved_1:8;     /* Reserved 1 */
         uint32_t vector:8;           /* Interrupt Vector */
         uint32_t irte_mode:1;        /* IRTE Mode */
@@ -147,9 +146,9 @@ union VTD_IR_TableEntry {
         uint32_t irte_mode:1;        /* IRTE Mode */
         uint32_t vector:8;           /* Interrupt Vector */
         uint32_t __reserved_1:8;     /* Reserved 1 */
-        uint32_t dest_id:32;         /* Destination ID */
 #endif
-        uint16_t source_id:16;       /* Source-ID */
+        uint32_t dest_id;            /* Destination ID */
+        uint16_t source_id;          /* Source-ID */
 #ifdef HOST_WORDS_BIGENDIAN
         uint64_t __reserved_2:44;    /* Reserved 2 */
         uint64_t sid_vtype:2;        /* Source-ID Validation Type */
@@ -220,7 +219,7 @@ struct VTD_MSIMessage {
             uint32_t dest:8;
             uint32_t __addr_head:12; /* 0xfee */
 #endif
-            uint32_t __addr_hi:32;
+            uint32_t __addr_hi;
         } QEMU_PACKED;
         uint64_t msi_addr;
     };
@@ -239,7 +238,7 @@ struct VTD_MSIMessage {
             uint16_t level:1;
             uint16_t trigger_mode:1;
 #endif
-            uint16_t __resved1:16;
+            uint16_t __resved1;
         } QEMU_PACKED;
         uint32_t msi_data;
     };
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 5a12ae7..20c4d2c 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2180,7 +2180,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
     }
 
     addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
-    if (le16_to_cpu(addr.addr.__head) != 0xfee) {
+    if (addr.addr.__head != 0xfee) {
         VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: "
                     "0x%"PRIx32, addr.data);
         return -VTD_FR_IR_REQ_RSVD;
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 13/47] intel_iommu: fix incorrect assert
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (11 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 12/47] intel_iommu: fix several incorrect endianess and bit fields Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 14/47] acpi: fix DMAR device scope for IOAPIC Michael S. Tsirkin
                   ` (34 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Peter Xu, Paolo Bonzini, Richard Henderson,
	Eduardo Habkost

From: Peter Xu <peterx@redhat.com>

Reported-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/intel_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 20c4d2c..1b706ad 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2463,7 +2463,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     IntelIOMMUState *s = opaque;
     VTDAddressSpace *vtd_as;
 
-    assert(0 <= devfn && devfn <= X86_IOMMU_PCI_DEVFN_MAX);
+    assert(0 <= devfn && devfn < X86_IOMMU_PCI_DEVFN_MAX);
 
     vtd_as = vtd_find_add_as(s, bus, devfn);
     return &vtd_as->as;
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 14/47] acpi: fix DMAR device scope for IOAPIC
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (12 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 13/47] intel_iommu: fix incorrect assert Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 15/47] virtio: allow per-device-class legacy features Michael S. Tsirkin
                   ` (33 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Peter Xu, Igor Mammedov, Paolo Bonzini,
	Richard Henderson, Eduardo Habkost

From: Peter Xu <peterx@redhat.com>

We should not use cpu_to_le16() here, instead each of device/function
value is stored in a 8 byte field.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/acpi/acpi-defs.h | 5 ++++-
 hw/i386/acpi-build.c        | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index d1d1d61..154f3b8 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -619,7 +619,10 @@ struct AcpiDmarDeviceScope {
     uint16_t reserved;
     uint8_t enumeration_id;
     uint8_t bus;
-    uint16_t path[0];           /* list of dev:func pairs */
+    struct {
+        uint8_t device;
+        uint8_t function;
+    } path[0];
 } QEMU_PACKED;
 typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
 
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index c02f408..a155857 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2605,7 +2605,8 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
     scope->length = ioapic_scope_size;
     scope->enumeration_id = ACPI_BUILD_IOAPIC_ID;
     scope->bus = Q35_PSEUDO_BUS_PLATFORM;
-    scope->path[0] = cpu_to_le16(Q35_PSEUDO_DEVFN_IOAPIC);
+    scope->path[0].device = PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC);
+    scope->path[0].function = PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC);
 
     build_header(linker, table_data, (void *)(table_data->data + dmar_start),
                  "DMAR", table_data->len - dmar_start, 1, NULL, NULL);
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 15/47] virtio: allow per-device-class legacy features
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (13 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 14/47] acpi: fix DMAR device scope for IOAPIC Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 16/47] virtio-net: mark VIRTIO_NET_F_GSO as legacy Michael S. Tsirkin
                   ` (32 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, qemu-stable, #,
	dependency, for, the, next, patch, Cornelia Huck,
	Christian Borntraeger, Richard Henderson, Alexander Graf

Legacy features are those that transitional devices only
expose on the legacy interface.
Allow different ones per device class.

Cc: qemu-stable@nongnu.org # dependency for the next patch
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 include/hw/virtio/virtio.h | 5 +++++
 hw/s390x/virtio-ccw.c      | 4 +++-
 hw/virtio/virtio-pci.c     | 4 +++-
 hw/virtio/virtio.c         | 2 ++
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index f12a1a8..bdb3c4b 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -113,6 +113,11 @@ typedef struct VirtioDeviceClass {
     void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
     void (*reset)(VirtIODevice *vdev);
     void (*set_status)(VirtIODevice *vdev, uint8_t val);
+    /* For transitional devices, this is a bitmap of features
+     * that are only exposed on the legacy interface but not
+     * the modern one.
+     */
+    uint64_t legacy_features;
     /* Test and clear event pending status.
      * Should be called after unmask to avoid losing events.
      * If backend does not support masking,
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 7d7f8f6..f5c1d98 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -303,6 +303,8 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
+            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+
             features.index = address_space_ldub(&address_space_memory,
                                                 ccw.cda
                                                 + sizeof(features.features),
@@ -312,7 +314,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
                 if (dev->revision >= 1) {
                     /* Don't offer legacy features for modern devices. */
                     features.features = (uint32_t)
-                        (vdev->host_features & ~VIRTIO_LEGACY_FEATURES);
+                        (vdev->host_features & ~vdc->legacy_features);
                 } else {
                     features.features = (uint32_t)vdev->host_features;
                 }
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 62001b4..97b32fe 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1175,7 +1175,9 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
         break;
     case VIRTIO_PCI_COMMON_DF:
         if (proxy->dfselect <= 1) {
-            val = (vdev->host_features & ~VIRTIO_LEGACY_FEATURES) >>
+            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+            val = (vdev->host_features & ~vdc->legacy_features) >>
                 (32 * proxy->dfselect);
         }
         break;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 1df5f4e..72ee06b 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2214,6 +2214,8 @@ static void virtio_device_class_init(ObjectClass *klass, void *data)
     dc->props = virtio_properties;
     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
+
+    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
 }
 
 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 16/47] virtio-net: mark VIRTIO_NET_F_GSO as legacy
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (14 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 15/47] virtio: allow per-device-class legacy features Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 17/47] vhost: Update 'ioeventfd_started' with host notifiers Michael S. Tsirkin
                   ` (31 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, qemu-stable, Cornelia Huck, Jason Wang

virtio 1.0 spec says this is a legacy feature bit,
hide it from guests in modern mode.

Note: for cross-version migration compatibility,
we keep the bit set in host_features.
The result will be that a guest migrating cross-version
will see host features change under it.
As guests only seem to read it once, this should
not be an issue. Meanwhile, will work to fix guests to
ignore this bit in virtio1 mode, too.

Cc: qemu-stable@nongnu.org
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 hw/net/virtio-net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 20aa63e..b68c69d 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1942,6 +1942,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
     vdc->load = virtio_net_load_device;
     vdc->save = virtio_net_save_device;
+    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
 }
 
 static const TypeInfo virtio_net_info = {
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 17/47] vhost: Update 'ioeventfd_started' with host notifiers
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (15 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 16/47] virtio-net: mark VIRTIO_NET_F_GSO as legacy Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 18/47] vhost: Use vbus var instead of VIRTIO_BUS() macro Michael S. Tsirkin
                   ` (30 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Felipe Franciosi, Christian Borntraeger

From: Felipe Franciosi <felipe@nutanix.com>

Following the recent refactoring of virtio notifiers [1], more specifically
the patch
ed08a2a0b ("virtio: use virtio_bus_set_host_notifier to start/stop ioeventfd")
that uses virtio_bus_set_host_notifier [2] by default, core
virtio code requires 'ioeventfd_started' to be set to true/false when
the host notifiers are configured. Because not all vhost devices were
update (eg. vhost-scsi) to use the new interface, this value is always
set to false.

When booting a guest with a vhost-scsi backend controller, SeaBIOS will
initially configure the device which sets all notifiers. The guest will
continue to boot fine until the kernel virtio-scsi driver reinitialises
the device causing a stop followed by another start. Since
ioeventfd_started was never set to true, the 'stop' operation triggered
by virtio_bus_set_host_notifier() will not result in a call to
virtio_pci_ioeventfd_assign(assign=false). This leaves the memory
regions with stale notifiers and results on the next start triggering
the following assertion:

  kvm_mem_ioeventfd_add: error adding ioeventfd: File exists
  Aborted

This patch updates ioeventfd_started whenever the notifiers are set or
cleared, fixing this issue.

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>

[1] http://lists.nongnu.org/archive/html/qemu-devel/2016-10/msg07748.html
[2] http://lists.nongnu.org/archive/html/qemu-devel/2016-10/msg07760.html

Fixes: ed08a2a0b ("virtio: use virtio_bus_set_host_notifier to start/stop ioeventfd")
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 hw/virtio/vhost.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 131f164..1290963 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1205,6 +1205,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
             goto fail_vq;
         }
     }
+    VIRTIO_BUS(qbus)->ioeventfd_started = true;
 
     return 0;
 fail_vq:
@@ -1239,6 +1240,7 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
         }
         assert (r >= 0);
     }
+    VIRTIO_BUS(qbus)->ioeventfd_started = false;
     virtio_device_start_ioeventfd(vdev);
 }
 
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 18/47] vhost: Use vbus var instead of VIRTIO_BUS() macro
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (16 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 17/47] vhost: Update 'ioeventfd_started' with host notifiers Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 19/47] vhost: migration blocker only if shared log is used Michael S. Tsirkin
                   ` (29 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Felipe Franciosi

From: Felipe Franciosi <felipe@nutanix.com>

Recent changes on vhost_dev_enable/disable_notifiers() produced a
VirtioBusState vbus variable which can be used instead of the
VIRTIO_BUS() macro. This commit just makes the code a little bit cleaner
and more consistent.

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/vhost.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 1290963..7d29dad 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1198,20 +1198,18 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
 
     virtio_device_stop_ioeventfd(vdev);
     for (i = 0; i < hdev->nvqs; ++i) {
-        r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
-                                         true);
+        r = virtio_bus_set_host_notifier(vbus, hdev->vq_index + i, true);
         if (r < 0) {
             error_report("vhost VQ %d notifier binding failed: %d", i, -r);
             goto fail_vq;
         }
     }
-    VIRTIO_BUS(qbus)->ioeventfd_started = true;
+    vbus->ioeventfd_started = true;
 
     return 0;
 fail_vq:
     while (--i >= 0) {
-        e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
-                                         false);
+        e = virtio_bus_set_host_notifier(vbus, hdev->vq_index + i, false);
         if (e < 0) {
             error_report("vhost VQ %d notifier cleanup error: %d", i, -r);
         }
@@ -1230,17 +1228,17 @@ fail:
 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
 {
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
     int i, r;
 
     for (i = 0; i < hdev->nvqs; ++i) {
-        r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
-                                         false);
+        r = virtio_bus_set_host_notifier(vbus, hdev->vq_index + i, false);
         if (r < 0) {
             error_report("vhost VQ %d notifier cleanup failed: %d", i, -r);
         }
         assert (r >= 0);
     }
-    VIRTIO_BUS(qbus)->ioeventfd_started = false;
+    vbus->ioeventfd_started = false;
     virtio_device_start_ioeventfd(vdev);
 }
 
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 19/47] vhost: migration blocker only if shared log is used
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (17 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 18/47] vhost: Use vbus var instead of VIRTIO_BUS() macro Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 20/47] qdev: hotplug: drop HotplugHandler.post_plug callback Michael S. Tsirkin
                   ` (28 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Rafael David Tinoco, Marc-André Lureau

From: Rafael David Tinoco <rafael.tinoco@canonical.com>

Commit 31190ed7 added a migration blocker in vhost_dev_init() to
check if memfd would succeed. It is better if this blocker first
checks if vhost backend requires shared log. This will avoid a
situation where a blocker is added inappropriately (e.g. shared
log allocation fails when vhost backend doesn't support it).

Signed-off-by: Rafael David Tinoco <rafael.tinoco@canonical.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/vhost.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 7d29dad..cf122bb 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1122,7 +1122,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
         if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
             error_setg(&hdev->migration_blocker,
                        "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
-        } else if (!qemu_memfd_check()) {
+        } else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_check()) {
             error_setg(&hdev->migration_blocker,
                        "Migration disabled: failed to allocate shared memory");
         }
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 20/47] qdev: hotplug: drop HotplugHandler.post_plug callback
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (18 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 19/47] vhost: migration blocker only if shared log is used Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 21/47] nvdimm acpi: drop the lock of fit buffer Michael S. Tsirkin
                   ` (27 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

as nvdimm acpi is okay to build fit when the nvdimm device
has not been 'realized'

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 include/hw/hotplug.h | 11 -----------
 hw/acpi/nvdimm.c     |  6 +-----
 hw/core/hotplug.c    | 11 -----------
 hw/core/qdev.c       | 20 ++++----------------
 hw/i386/pc.c         | 23 ++++-------------------
 5 files changed, 9 insertions(+), 62 deletions(-)

diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h
index 10ca5b6..1a0516a 100644
--- a/include/hw/hotplug.h
+++ b/include/hw/hotplug.h
@@ -47,7 +47,6 @@ typedef void (*hotplug_fn)(HotplugHandler *plug_handler,
  * @parent: Opaque parent interface.
  * @pre_plug: pre plug callback called at start of device.realize(true)
  * @plug: plug callback called at end of device.realize(true).
- * @post_pug: post plug callback called after device is successfully plugged.
  * @unplug_request: unplug request callback.
  *                  Used as a means to initiate device unplug for devices that
  *                  require asynchronous unplug handling.
@@ -62,7 +61,6 @@ typedef struct HotplugHandlerClass {
     /* <public> */
     hotplug_fn pre_plug;
     hotplug_fn plug;
-    hotplug_fn post_plug;
     hotplug_fn unplug_request;
     hotplug_fn unplug;
 } HotplugHandlerClass;
@@ -86,15 +84,6 @@ void hotplug_handler_pre_plug(HotplugHandler *plug_handler,
                               Error **errp);
 
 /**
- * hotplug_handler_post_plug:
- *
- * Call #HotplugHandlerClass.post_plug callback of @plug_handler.
- */
-void hotplug_handler_post_plug(HotplugHandler *plug_handler,
-                               DeviceState *plugged_dev,
-                               Error **errp);
-
-/**
  * hotplug_handler_unplug_request:
  *
  * Calls #HotplugHandlerClass.unplug_request callback of @plug_handler.
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 602ec54..623bb36 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -38,11 +38,7 @@ static int nvdimm_plugged_device_list(Object *obj, void *opaque)
     GSList **list = opaque;
 
     if (object_dynamic_cast(obj, TYPE_NVDIMM)) {
-        DeviceState *dev = DEVICE(obj);
-
-        if (dev->realized) { /* only realized NVDIMMs matter */
-            *list = g_slist_append(*list, DEVICE(obj));
-        }
+        *list = g_slist_append(*list, DEVICE(obj));
     }
 
     object_child_foreach(obj, nvdimm_plugged_device_list, opaque);
diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c
index ab34c19..17ac986 100644
--- a/hw/core/hotplug.c
+++ b/hw/core/hotplug.c
@@ -35,17 +35,6 @@ void hotplug_handler_plug(HotplugHandler *plug_handler,
     }
 }
 
-void hotplug_handler_post_plug(HotplugHandler *plug_handler,
-                               DeviceState *plugged_dev,
-                               Error **errp)
-{
-    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler);
-
-    if (hdc->post_plug) {
-        hdc->post_plug(plug_handler, plugged_dev, errp);
-    }
-}
-
 void hotplug_handler_unplug_request(HotplugHandler *plug_handler,
                                     DeviceState *plugged_dev,
                                     Error **errp)
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index d835e62..5783442 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -945,21 +945,10 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
                 goto child_realize_fail;
             }
         }
-
         if (dev->hotplugged) {
             device_reset(dev);
         }
         dev->pending_deleted_event = false;
-        dev->realized = value;
-
-        if (hotplug_ctrl) {
-            hotplug_handler_post_plug(hotplug_ctrl, dev, &local_err);
-        }
-
-        if (local_err != NULL) {
-            dev->realized = value;
-            goto post_realize_fail;
-        }
     } else if (!value && dev->realized) {
         Error **local_errp = NULL;
         QLIST_FOREACH(bus, &dev->child_bus, sibling) {
@@ -976,14 +965,13 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
         }
         dev->pending_deleted_event = true;
         DEVICE_LISTENER_CALL(unrealize, Reverse, dev);
+    }
 
-        if (local_err != NULL) {
-            goto fail;
-        }
-
-        dev->realized = value;
+    if (local_err != NULL) {
+        goto fail;
     }
 
+    dev->realized = value;
     return;
 
 child_realize_fail:
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 2c37a78..cebaad2 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1715,22 +1715,16 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
         goto out;
     }
 
+    if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+        nvdimm_acpi_hotplug(&pcms->acpi_nvdimm_state);
+    }
+
     hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
     hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
 out:
     error_propagate(errp, local_err);
 }
 
-static void pc_dimm_post_plug(HotplugHandler *hotplug_dev,
-                              DeviceState *dev, Error **errp)
-{
-    PCMachineState *pcms = PC_MACHINE(hotplug_dev);
-
-    if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
-        nvdimm_acpi_hotplug(&pcms->acpi_nvdimm_state);
-    }
-}
-
 static void pc_dimm_unplug_request(HotplugHandler *hotplug_dev,
                                    DeviceState *dev, Error **errp)
 {
@@ -2008,14 +2002,6 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
     }
 }
 
-static void pc_machine_device_post_plug_cb(HotplugHandler *hotplug_dev,
-                                           DeviceState *dev, Error **errp)
-{
-    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
-        pc_dimm_post_plug(hotplug_dev, dev, errp);
-    }
-}
-
 static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
                                                 DeviceState *dev, Error **errp)
 {
@@ -2322,7 +2308,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     mc->reset = pc_machine_reset;
     hc->pre_plug = pc_machine_device_pre_plug_cb;
     hc->plug = pc_machine_device_plug_cb;
-    hc->post_plug = pc_machine_device_post_plug_cb;
     hc->unplug_request = pc_machine_device_unplug_request_cb;
     hc->unplug = pc_machine_device_unplug_cb;
     nc->nmi_monitor_handler = x86_nmi;
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 21/47] nvdimm acpi: drop the lock of fit buffer
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (19 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 20/47] qdev: hotplug: drop HotplugHandler.post_plug callback Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 22/47] pc: memhp: move nvdimm hotplug out of memory hotplug Michael S. Tsirkin
                   ` (26 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

as there is a global lock to protect vm-exit handlers and
QMP/monitor, this lock can be dropped

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 include/hw/mem/nvdimm.h | 17 +++++------------
 hw/acpi/nvdimm.c        | 11 +----------
 2 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 33cd421..d3ffb25 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -99,20 +99,13 @@ typedef struct NVDIMMClass NVDIMMClass;
 #define NVDIMM_ACPI_IO_LEN      4
 
 /*
- * The buffer, @fit, saves the FIT info for all the presented NVDIMM
- * devices which is updated after the NVDIMM device is plugged or
- * unplugged.
- *
- * Rules to use the buffer:
- *    1) the user should hold the @lock to access the buffer.
- *    2) mark @dirty whenever the buffer is updated.
- *
- * These rules preserve NVDIMM ACPI _FIT method to read incomplete
- * or obsolete fit info if fit update happens during multiple RFIT
- * calls.
+ * NvdimmFitBuffer:
+ * @fit: FIT structures for present NVDIMMs. It is updated when
+ *   the NVDIMM device is plugged or unplugged.
+ * @dirty: It allows OSPM to detect change and restart read in
+ *   progress if there is any.
  */
 struct NvdimmFitBuffer {
-    QemuMutex lock;
     GArray *fit;
     bool dirty;
 };
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 623bb36..0fe3547 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -371,17 +371,14 @@ static GArray *nvdimm_build_device_structure(void)
 
 static void nvdimm_init_fit_buffer(NvdimmFitBuffer *fit_buf)
 {
-    qemu_mutex_init(&fit_buf->lock);
     fit_buf->fit = g_array_new(false, true /* clear */, 1);
 }
 
 static void nvdimm_build_fit_buffer(NvdimmFitBuffer *fit_buf)
 {
-    qemu_mutex_lock(&fit_buf->lock);
     g_array_free(fit_buf->fit, true);
     fit_buf->fit = nvdimm_build_device_structure();
     fit_buf->dirty = true;
-    qemu_mutex_unlock(&fit_buf->lock);
 }
 
 void nvdimm_acpi_hotplug(AcpiNVDIMMState *state)
@@ -395,11 +392,10 @@ static void nvdimm_build_nfit(AcpiNVDIMMState *state, GArray *table_offsets,
     NvdimmFitBuffer *fit_buf = &state->fit_buf;
     unsigned int header;
 
-    qemu_mutex_lock(&fit_buf->lock);
 
     /* NVDIMM device is not plugged? */
     if (!fit_buf->fit->len) {
-        goto exit;
+        return;
     }
 
     acpi_add_table(table_offsets, table_data);
@@ -413,9 +409,6 @@ static void nvdimm_build_nfit(AcpiNVDIMMState *state, GArray *table_offsets,
     build_header(linker, table_data,
                  (void *)(table_data->data + header), "NFIT",
                  sizeof(NvdimmNfitHeader) + fit_buf->fit->len, 1, NULL, NULL);
-
-exit:
-    qemu_mutex_unlock(&fit_buf->lock);
 }
 
 struct NvdimmDsmIn {
@@ -544,7 +537,6 @@ static void nvdimm_dsm_func_read_fit(AcpiNVDIMMState *state, NvdimmDsmIn *in,
     read_fit = (NvdimmFuncReadFITIn *)in->arg3;
     le32_to_cpus(&read_fit->offset);
 
-    qemu_mutex_lock(&fit_buf->lock);
     fit = fit_buf->fit;
 
     nvdimm_debug("Read FIT: offset %#x FIT size %#x Dirty %s.\n",
@@ -578,7 +570,6 @@ exit:
     cpu_physical_memory_write(dsm_mem_addr, read_fit_out, size);
 
     g_free(read_fit_out);
-    qemu_mutex_unlock(&fit_buf->lock);
 }
 
 static void nvdimm_dsm_reserved_root(AcpiNVDIMMState *state, NvdimmDsmIn *in,
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 22/47] pc: memhp: move nvdimm hotplug out of memory hotplug
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (20 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 21/47] nvdimm acpi: drop the lock of fit buffer Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 23/47] pc: memhp: stop handling nvdimm hotplug in pc_dimm_unplug Michael S. Tsirkin
                   ` (25 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

as they use completely different way to handle hotplug event

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 include/hw/mem/nvdimm.h                 |  1 +
 hw/acpi/ich9.c                          |  8 ++++++--
 hw/acpi/memory_hotplug.c                | 31 ++++++++-----------------------
 hw/acpi/nvdimm.c                        |  7 +++++++
 hw/acpi/piix4.c                         |  7 ++++++-
 default-configs/mips-softmmu-common.mak |  1 +
 docs/specs/acpi_mem_hotplug.txt         |  3 ---
 docs/specs/acpi_nvdimm.txt              |  5 +++++
 8 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index d3ffb25..60585c3 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -131,4 +131,5 @@ void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,
                        BIOSLinker *linker, AcpiNVDIMMState *state,
                        uint32_t ram_slots);
 void nvdimm_acpi_hotplug(AcpiNVDIMMState *state);
+void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev);
 #endif
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index e5a3c18..830c475 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -490,8 +490,12 @@ void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     if (lpc->pm.acpi_memory_hotplug.is_enabled &&
         object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
-        acpi_memory_plug_cb(hotplug_dev, &lpc->pm.acpi_memory_hotplug,
-                            dev, errp);
+        if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+            nvdimm_acpi_plug_cb(hotplug_dev, dev);
+        } else {
+            acpi_memory_plug_cb(hotplug_dev, &lpc->pm.acpi_memory_hotplug,
+                                dev, errp);
+        }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
         if (lpc->pm.cpu_hotplug_legacy) {
             legacy_acpi_cpu_plug_cb(hotplug_dev, &lpc->pm.gpe_cpu, dev, errp);
diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
index 70f6451..ec4e64b 100644
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -2,7 +2,6 @@
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/pc-hotplug.h"
 #include "hw/mem/pc-dimm.h"
-#include "hw/mem/nvdimm.h"
 #include "hw/boards.h"
 #include "hw/qdev-core.h"
 #include "trace.h"
@@ -233,8 +232,11 @@ void acpi_memory_plug_cb(HotplugHandler *hotplug_dev, MemHotplugState *mem_st,
                          DeviceState *dev, Error **errp)
 {
     MemStatus *mdev;
-    AcpiEventStatusBits event;
-    bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
+    DeviceClass *dc = DEVICE_GET_CLASS(dev);
+
+    if (!dc->hotpluggable) {
+        return;
+    }
 
     mdev = acpi_memory_slot_status(mem_st, dev, errp);
     if (!mdev) {
@@ -242,23 +244,10 @@ void acpi_memory_plug_cb(HotplugHandler *hotplug_dev, MemHotplugState *mem_st,
     }
 
     mdev->dimm = dev;
-
-    /*
-     * do not set is_enabled and is_inserting if the slot is plugged with
-     * a nvdimm device to stop OSPM inquires memory region from the slot.
-     */
-    if (is_nvdimm) {
-        event = ACPI_NVDIMM_HOTPLUG_STATUS;
-    } else {
-        mdev->is_enabled = true;
-        event = ACPI_MEMORY_HOTPLUG_STATUS;
-    }
-
+    mdev->is_enabled = true;
     if (dev->hotplugged) {
-        if (!is_nvdimm) {
-            mdev->is_inserting = true;
-        }
-        acpi_send_event(DEVICE(hotplug_dev), event);
+        mdev->is_inserting = true;
+        acpi_send_event(DEVICE(hotplug_dev), ACPI_MEMORY_HOTPLUG_STATUS);
     }
 }
 
@@ -273,8 +262,6 @@ void acpi_memory_unplug_request_cb(HotplugHandler *hotplug_dev,
         return;
     }
 
-    /* nvdimm device hot unplug is not supported yet. */
-    assert(!object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM));
     mdev->is_removing = true;
     acpi_send_event(DEVICE(hotplug_dev), ACPI_MEMORY_HOTPLUG_STATUS);
 }
@@ -289,8 +276,6 @@ void acpi_memory_unplug_cb(MemHotplugState *mem_st,
         return;
     }
 
-    /* nvdimm device hot unplug is not supported yet. */
-    assert(!object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM));
     mdev->is_enabled = false;
     mdev->dimm = NULL;
 }
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 0fe3547..5156565 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -868,6 +868,13 @@ static const MemoryRegionOps nvdimm_dsm_ops = {
     },
 };
 
+void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    if (dev->hotplugged) {
+        acpi_send_event(DEVICE(hotplug_dev), ACPI_NVDIMM_HOTPLUG_STATUS);
+    }
+}
+
 void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io,
                             FWCfgState *fw_cfg, Object *owner)
 {
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 2adc246..17d36bd 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -378,7 +378,12 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev,
 
     if (s->acpi_memory_hotplug.is_enabled &&
         object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
-        acpi_memory_plug_cb(hotplug_dev, &s->acpi_memory_hotplug, dev, errp);
+        if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+            nvdimm_acpi_plug_cb(hotplug_dev, dev);
+        } else {
+            acpi_memory_plug_cb(hotplug_dev, &s->acpi_memory_hotplug,
+                                dev, errp);
+        }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
         acpi_pcihp_device_plug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, errp);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
diff --git a/default-configs/mips-softmmu-common.mak b/default-configs/mips-softmmu-common.mak
index 0394514..f0676f5 100644
--- a/default-configs/mips-softmmu-common.mak
+++ b/default-configs/mips-softmmu-common.mak
@@ -17,6 +17,7 @@ CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_ACPI_X86=y
 CONFIG_ACPI_MEMORY_HOTPLUG=y
+CONFIG_ACPI_NVDIMM=y
 CONFIG_ACPI_CPU_HOTPLUG=y
 CONFIG_APM=y
 CONFIG_I8257=y
diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt
index cb26dd2..3df3620 100644
--- a/docs/specs/acpi_mem_hotplug.txt
+++ b/docs/specs/acpi_mem_hotplug.txt
@@ -4,9 +4,6 @@ QEMU<->ACPI BIOS memory hotplug interface
 ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add
 and hot-remove events.
 
-ACPI BIOS GPE.4 handler is dedicated for notifying OS about nvdimm device
-hot-add and hot-remove events.
-
 Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access):
 ---------------------------------------------------------------
 0xa00:
diff --git a/docs/specs/acpi_nvdimm.txt b/docs/specs/acpi_nvdimm.txt
index 4aa5e3d..d244147 100644
--- a/docs/specs/acpi_nvdimm.txt
+++ b/docs/specs/acpi_nvdimm.txt
@@ -127,6 +127,11 @@ _DSM process diagram:
  | result from the page     |      |              |
  +--------------------------+      +--------------+
 
+NVDIMM hotplug
+--------------
+ACPI BIOS GPE.4 handler is dedicated for notifying OS about nvdimm device
+hot-add event.
+
 Device Handle Reservation
 -------------------------
 As we mentioned above, byte 0 ~ byte 3 in the DSM memory save NVDIMM device
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 23/47] pc: memhp: stop handling nvdimm hotplug in pc_dimm_unplug
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (21 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 22/47] pc: memhp: move nvdimm hotplug out of memory hotplug Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 24/47] nvdimm acpi: clean up nvdimm_build_acpi Michael S. Tsirkin
                   ` (24 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

as it is never called when nvdimm hotplug happens

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/i386/pc.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index cebaad2..b69cd48 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1761,12 +1761,6 @@ static void pc_dimm_unplug(HotplugHandler *hotplug_dev,
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
 
-    if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
-        error_setg(&local_err,
-                   "nvdimm device hot unplug is not supported yet.");
-        goto out;
-    }
-
     hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
     hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
 
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 24/47] nvdimm acpi: clean up nvdimm_build_acpi
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (22 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 23/47] pc: memhp: stop handling nvdimm hotplug in pc_dimm_unplug Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 25/47] docs: improve the doc of Read FIT method Michael S. Tsirkin
                   ` (23 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

To make the code more clearer, we
1) check ram_slots first, and build ssdt & nfit only when it is available
2) use nvdimm_get_plugged_device_list() to check if there is nvdimm device
   plugged

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/acpi/nvdimm.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 5156565..65eb6c9 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -392,12 +392,6 @@ static void nvdimm_build_nfit(AcpiNVDIMMState *state, GArray *table_offsets,
     NvdimmFitBuffer *fit_buf = &state->fit_buf;
     unsigned int header;
 
-
-    /* NVDIMM device is not plugged? */
-    if (!fit_buf->fit->len) {
-        return;
-    }
-
     acpi_add_table(table_offsets, table_data);
 
     /* NFIT header. */
@@ -1275,14 +1269,22 @@ void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,
                        BIOSLinker *linker, AcpiNVDIMMState *state,
                        uint32_t ram_slots)
 {
-    nvdimm_build_nfit(state, table_offsets, table_data, linker);
+    GSList *device_list;
 
-    /*
-     * NVDIMM device is allowed to be plugged only if there is available
-     * slot.
-     */
-    if (ram_slots) {
-        nvdimm_build_ssdt(table_offsets, table_data, linker, state->dsm_mem,
-                          ram_slots);
+    /* no nvdimm device can be plugged. */
+    if (!ram_slots) {
+        return;
     }
+
+    nvdimm_build_ssdt(table_offsets, table_data, linker, state->dsm_mem,
+                      ram_slots);
+
+    device_list = nvdimm_get_plugged_device_list();
+    /* no NVDIMM device is plugged. */
+    if (!device_list) {
+        return;
+    }
+
+    nvdimm_build_nfit(state, table_offsets, table_data, linker);
+    g_slist_free(device_list);
 }
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 25/47] docs: improve the doc of Read FIT method
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (23 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 24/47] nvdimm acpi: clean up nvdimm_build_acpi Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 26/47] nvdimm acpi: rename nvdimm_plugged_device_list Michael S. Tsirkin
                   ` (22 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

Improve the description and clearly document the length field

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 docs/specs/acpi_nvdimm.txt | 96 +++++++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 49 deletions(-)

diff --git a/docs/specs/acpi_nvdimm.txt b/docs/specs/acpi_nvdimm.txt
index d244147..3f322e6 100644
--- a/docs/specs/acpi_nvdimm.txt
+++ b/docs/specs/acpi_nvdimm.txt
@@ -65,8 +65,8 @@ _FIT(Firmware Interface Table)
    The detailed definition of the structure can be found at ACPI 6.0: 5.2.25
    NVDIMM Firmware Interface Table (NFIT).
 
-QEMU NVDIMM Implemention
-========================
+QEMU NVDIMM Implementation
+==========================
 QEMU uses 4 bytes IO Port starting from 0x0a18 and a RAM-based memory page
 for NVDIMM ACPI.
 
@@ -80,8 +80,17 @@ Memory:
    emulates _DSM access and writes the output data to it.
 
    ACPI writes _DSM Input Data (based on the offset in the page):
-   [0x0 - 0x3]: 4 bytes, NVDIMM Device Handle, 0 is reserved for NVDIMM
-                Root device.
+   [0x0 - 0x3]: 4 bytes, NVDIMM Device Handle.
+
+                The handle is completely QEMU internal thing, the values in
+                range [1, 0xFFFF] indicate nvdimm device. Other values are
+                reserved for other purposes.
+
+                Reserved handles:
+                0 is reserved for nvdimm root device named NVDR.
+                0x10000 is reserved for QEMU internal DSM function called on
+                the root device.
+
    [0x4 - 0x7]: 4 bytes, Revision ID, that is the Arg1 of _DSM method.
    [0x8 - 0xB]: 4 bytes. Function Index, that is the Arg2 of _DSM method.
    [0xC - 0xFFF]: 4084 bytes, the Arg3 of _DSM method.
@@ -132,28 +141,12 @@ NVDIMM hotplug
 ACPI BIOS GPE.4 handler is dedicated for notifying OS about nvdimm device
 hot-add event.
 
-Device Handle Reservation
--------------------------
-As we mentioned above, byte 0 ~ byte 3 in the DSM memory save NVDIMM device
-handle. The handle is completely QEMU internal thing, the values in range
-[0, 0xFFFF] indicate nvdimm device (O means nvdimm root device named NVDR),
-other values are reserved by other purpose.
-
-Current reserved handle:
-0x10000 is reserved for QEMU internal DSM function called on the root
-device.
-
 QEMU internal use only _DSM function
 ------------------------------------
-UUID, 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62, is reserved for QEMU internal
-DSM function.
-
-There is the function introduced by QEMU and only used by QEMU internal.
-
 1) Read FIT
-   As we only reserved one page for NVDIMM ACPI it is impossible to map the
-   whole FIT data to guest's address space. This function is used by _FIT
-   method to read a piece of FIT data from QEMU.
+   _FIT method uses _DSM method to fetch NFIT structures blob from QEMU
+   in 1 page sized increments which are then concatenated and returned
+   as _FIT method result.
 
    Input parameters:
    Arg0 – UUID {set to 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62}
@@ -161,29 +154,34 @@ There is the function introduced by QEMU and only used by QEMU internal.
    Arg2 - Function Index, 0x1
    Arg3 - A package containing a buffer whose layout is as follows:
 
-   +----------+-------------+-------------+-----------------------------------+
-   |  Filed   | Byte Length | Byte Offset | Description                       |
-   +----------+-------------+-------------+-----------------------------------+
-   | offset   |     4       |    0        | the offset of FIT buffer          |
-   +----------+-------------+-------------+-----------------------------------+
-
-   Output:
-   +----------+-------------+-------------+-----------------------------------+
-   |  Filed   | Byte Length | Byte Offset | Description                       |
-   +----------+-------------+-------------+-----------------------------------+
-   |          |             |             | return status codes               |
-   |          |             |             |   0x100 indicates fit has been    |
-   | status   |     4       |    0        |   updated                         |
-   |          |             |             | other follows Chapter 3 in DSM    |
-   |          |             |             | Spec Rev1                         |
-   +----------+-------------+-------------+-----------------------------------+
-   | fit data |  Varies     |    4        | FIT data                          |
-   |          |             |             |                                   |
-   +----------+-------------+-------------+-----------------------------------+
-
-   The FIT offset is maintained by the caller itself, current offset plugs
-   the length returned by the function is the next offset we should read.
-   When all the FIT data has been read out, zero length is returned.
-
-   If it returns 0x100, OSPM should restart to read FIT (read from offset 0
-   again).
+   +----------+--------+--------+-------------------------------------------+
+   |  Field   | Length | Offset |                 Description               |
+   +----------+--------+--------+-------------------------------------------+
+   | offset   |   4    |   0    | offset in QEMU's NFIT structures blob to  |
+   |          |        |        | read from                                 |
+   +----------+--------+--------+-------------------------------------------+
+
+   Output layout in the dsm memory page:
+   +----------+--------+--------+-------------------------------------------+
+   |  Field   | Length | Offset |                 Description               |
+   +----------+--------+--------+-------------------------------------------+
+   | length   |   4    |   0    | length of entire returned data            |
+   |          |        |        | (including this header)                   |
+   +----------+-----------------+-------------------------------------------+
+   |          |        |        | return status codes                       |
+   |          |        |        | 0x0 - success                             |
+   |          |        |        | 0x100 - error caused by NFIT update while |
+   | status   |   4    |   4    | read by _FIT wasn't completed, other      |
+   |          |        |        | codes follow Chapter 3 in DSM Spec Rev1   |
+   +----------+-----------------+-------------------------------------------+
+   | fit data | Varies |   8    | contains FIT data, this field is present  |
+   |          |        |        | if status field is 0;                     |
+   +----------+--------+--------+-------------------------------------------+
+
+   The FIT offset is maintained by the OSPM itself, current offset plus
+   the size of the fit data returned by the function is the next offset
+   OSPM should read. When all FIT data has been read out, zero fit data
+   size is returned.
+
+   If it returns status code 0x100, OSPM should restart to read FIT (read
+   from offset 0 again).
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 26/47] nvdimm acpi: rename nvdimm_plugged_device_list
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (24 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 25/47] docs: improve the doc of Read FIT method Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 27/47] nvdimm acpi: cleanup nvdimm_build_fit Michael S. Tsirkin
                   ` (21 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

Its behavior has been changed as the nvdimm device which is being
realized also will be handled in this function, so rename it to
reflect the fact

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/acpi/nvdimm.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 65eb6c9..f2c0659 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -33,7 +33,7 @@
 #include "hw/nvram/fw_cfg.h"
 #include "hw/mem/nvdimm.h"
 
-static int nvdimm_plugged_device_list(Object *obj, void *opaque)
+static int nvdimm_device_list(Object *obj, void *opaque)
 {
     GSList **list = opaque;
 
@@ -41,23 +41,22 @@ static int nvdimm_plugged_device_list(Object *obj, void *opaque)
         *list = g_slist_append(*list, DEVICE(obj));
     }
 
-    object_child_foreach(obj, nvdimm_plugged_device_list, opaque);
+    object_child_foreach(obj, nvdimm_device_list, opaque);
     return 0;
 }
 
 /*
- * inquire plugged NVDIMM devices and link them into the list which is
+ * inquire NVDIMM devices and link them into the list which is
  * returned to the caller.
  *
  * Note: it is the caller's responsibility to free the list to avoid
  * memory leak.
  */
-static GSList *nvdimm_get_plugged_device_list(void)
+static GSList *nvdimm_get_device_list(void)
 {
     GSList *list = NULL;
 
-    object_child_foreach(qdev_get_machine(), nvdimm_plugged_device_list,
-                         &list);
+    object_child_foreach(qdev_get_machine(), nvdimm_device_list, &list);
     return list;
 }
 
@@ -215,7 +214,7 @@ static uint32_t nvdimm_slot_to_dcr_index(int slot)
 static NVDIMMDevice *nvdimm_get_device_by_handle(uint32_t handle)
 {
     NVDIMMDevice *nvdimm = NULL;
-    GSList *list, *device_list = nvdimm_get_plugged_device_list();
+    GSList *list, *device_list = nvdimm_get_device_list();
 
     for (list = device_list; list; list = list->next) {
         NVDIMMDevice *nvd = list->data;
@@ -346,7 +345,7 @@ static void nvdimm_build_structure_dcr(GArray *structures, DeviceState *dev)
 
 static GArray *nvdimm_build_device_structure(void)
 {
-    GSList *device_list = nvdimm_get_plugged_device_list();
+    GSList *device_list = nvdimm_get_device_list();
     GArray *structures = g_array_new(false, true /* clear */, 1);
 
     for (; device_list; device_list = device_list->next) {
@@ -1279,7 +1278,7 @@ void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,
     nvdimm_build_ssdt(table_offsets, table_data, linker, state->dsm_mem,
                       ram_slots);
 
-    device_list = nvdimm_get_plugged_device_list();
+    device_list = nvdimm_get_device_list();
     /* no NVDIMM device is plugged. */
     if (!device_list) {
         return;
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 27/47] nvdimm acpi: cleanup nvdimm_build_fit
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (25 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 26/47] nvdimm acpi: rename nvdimm_plugged_device_list Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 28/47] nvdimm acpi: rename nvdimm_acpi_hotplug Michael S. Tsirkin
                   ` (20 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

inline buf_size to refine the code a bit

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/acpi/nvdimm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index f2c0659..148999e 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -1134,11 +1134,9 @@ static void nvdimm_build_fit(Aml *dev)
     aml_append(ifctx, aml_return(aml_buffer(0, NULL)));
     aml_append(method, ifctx);
 
-    aml_append(method, aml_store(aml_shiftleft(buf_size, aml_int(3)),
-                                 buf_size));
     aml_append(method, aml_create_field(buf,
                             aml_int(4 * BITS_PER_BYTE), /* offset at byte 4.*/
-                            buf_size, "BUFF"));
+                            aml_shiftleft(buf_size, aml_int(3)), "BUFF"));
     aml_append(method, aml_return(aml_name("BUFF")));
     aml_append(dev, method);
 
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 28/47] nvdimm acpi: rename nvdimm_acpi_hotplug
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (26 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 27/47] nvdimm acpi: cleanup nvdimm_build_fit Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 29/47] nvdimm acpi: define DSM return codes Michael S. Tsirkin
                   ` (19 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

Rename it to nvdimm_plug()

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 include/hw/mem/nvdimm.h | 2 +-
 hw/acpi/nvdimm.c        | 2 +-
 hw/i386/pc.c            | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 60585c3..03e1ff9 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -130,6 +130,6 @@ void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io,
 void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,
                        BIOSLinker *linker, AcpiNVDIMMState *state,
                        uint32_t ram_slots);
-void nvdimm_acpi_hotplug(AcpiNVDIMMState *state);
+void nvdimm_plug(AcpiNVDIMMState *state);
 void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev);
 #endif
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 148999e..7733f14 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -380,7 +380,7 @@ static void nvdimm_build_fit_buffer(NvdimmFitBuffer *fit_buf)
     fit_buf->dirty = true;
 }
 
-void nvdimm_acpi_hotplug(AcpiNVDIMMState *state)
+void nvdimm_plug(AcpiNVDIMMState *state)
 {
     nvdimm_build_fit_buffer(&state->fit_buf);
 }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index b69cd48..a9b1950 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1716,7 +1716,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
     }
 
     if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
-        nvdimm_acpi_hotplug(&pcms->acpi_nvdimm_state);
+        nvdimm_plug(&pcms->acpi_nvdimm_state);
     }
 
     hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 29/47] nvdimm acpi: define DSM return codes
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (27 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 28/47] nvdimm acpi: rename nvdimm_acpi_hotplug Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 30/47] nvdimm acpi: fix two comments Michael S. Tsirkin
                   ` (18 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

and use these codes to refine the code

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/acpi/nvdimm.c | 46 +++++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 7733f14..c7e7744 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -514,7 +514,13 @@ nvdimm_dsm_no_payload(uint32_t func_ret_status, hwaddr dsm_mem_addr)
     cpu_physical_memory_write(dsm_mem_addr, &out, sizeof(out));
 }
 
-#define NVDIMM_QEMU_RSVD_HANDLE_ROOT 0x10000
+#define NVDIMM_DSM_RET_STATUS_SUCCESS        0 /* Success */
+#define NVDIMM_DSM_RET_STATUS_UNSUPPORT      1 /* Not Supported */
+#define NVDIMM_DSM_RET_STATUS_NOMEMDEV       2 /* Non-Existing Memory Device */
+#define NVDIMM_DSM_RET_STATUS_INVALID        3 /* Invalid Input Parameters */
+#define NVDIMM_DSM_RET_STATUS_FIT_CHANGED    0x100 /* FIT Changed */
+
+#define NVDIMM_QEMU_RSVD_HANDLE_ROOT         0x10000
 
 /* Read FIT data, defined in docs/specs/acpi_nvdimm.txt. */
 static void nvdimm_dsm_func_read_fit(AcpiNVDIMMState *state, NvdimmDsmIn *in,
@@ -536,7 +542,7 @@ static void nvdimm_dsm_func_read_fit(AcpiNVDIMMState *state, NvdimmDsmIn *in,
                  read_fit->offset, fit->len, fit_buf->dirty ? "Yes" : "No");
 
     if (read_fit->offset > fit->len) {
-        func_ret_status = 3 /* Invalid Input Parameters */;
+        func_ret_status = NVDIMM_DSM_RET_STATUS_INVALID;
         goto exit;
     }
 
@@ -544,11 +550,11 @@ static void nvdimm_dsm_func_read_fit(AcpiNVDIMMState *state, NvdimmDsmIn *in,
     if (!read_fit->offset) {
         fit_buf->dirty = false;
     } else if (fit_buf->dirty) { /* FIT has been changed during RFIT. */
-        func_ret_status = 0x100 /* fit changed */;
+        func_ret_status = NVDIMM_DSM_RET_STATUS_FIT_CHANGED;
         goto exit;
     }
 
-    func_ret_status = 0 /* Success */;
+    func_ret_status = NVDIMM_DSM_RET_STATUS_SUCCESS;
     read_len = MIN(fit->len - read_fit->offset,
                    4096 - sizeof(NvdimmFuncReadFITOut));
 
@@ -577,7 +583,7 @@ static void nvdimm_dsm_reserved_root(AcpiNVDIMMState *state, NvdimmDsmIn *in,
         return;
     }
 
-    nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr);
+    nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
 }
 
 static void nvdimm_dsm_root(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
@@ -593,7 +599,7 @@ static void nvdimm_dsm_root(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
     }
 
     /* No function except function 0 is supported yet. */
-    nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr);
+    nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
 }
 
 /*
@@ -639,7 +645,7 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
 
     nvdimm_debug("label_size %#x, max_xfer %#x.\n", label_size, mxfer);
 
-    label_size_out.func_ret_status = cpu_to_le32(0 /* Success */);
+    label_size_out.func_ret_status = cpu_to_le32(NVDIMM_DSM_RET_STATUS_SUCCESS);
     label_size_out.label_size = cpu_to_le32(label_size);
     label_size_out.max_xfer = cpu_to_le32(mxfer);
 
@@ -650,7 +656,7 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
 static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
                                            uint32_t offset, uint32_t length)
 {
-    uint32_t ret = 3 /* Invalid Input Parameters */;
+    uint32_t ret = NVDIMM_DSM_RET_STATUS_INVALID;
 
     if (offset + length < offset) {
         nvdimm_debug("offset %#x + length %#x is overflow.\n", offset,
@@ -670,7 +676,7 @@ static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
         return ret;
     }
 
-    return 0 /* Success */;
+    return NVDIMM_DSM_RET_STATUS_SUCCESS;
 }
 
 /*
@@ -694,7 +700,7 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
 
     status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset,
                                         get_label_data->length);
-    if (status != 0 /* Success */) {
+    if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
         nvdimm_dsm_no_payload(status, dsm_mem_addr);
         return;
     }
@@ -704,7 +710,8 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
     get_label_data_out = g_malloc(size);
 
     get_label_data_out->len = cpu_to_le32(size);
-    get_label_data_out->func_ret_status = cpu_to_le32(0 /* Success */);
+    get_label_data_out->func_ret_status =
+                            cpu_to_le32(NVDIMM_DSM_RET_STATUS_SUCCESS);
     nvc->read_label_data(nvdimm, get_label_data_out->out_buf,
                          get_label_data->length, get_label_data->offset);
 
@@ -732,7 +739,7 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
 
     status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset,
                                         set_label_data->length);
-    if (status != 0 /* Success */) {
+    if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
         nvdimm_dsm_no_payload(status, dsm_mem_addr);
         return;
     }
@@ -742,7 +749,7 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
 
     nvc->write_label_data(nvdimm, set_label_data->in_buf,
                           set_label_data->length, set_label_data->offset);
-    nvdimm_dsm_no_payload(0 /* Success */, dsm_mem_addr);
+    nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_SUCCESS, dsm_mem_addr);
 }
 
 static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
@@ -766,7 +773,7 @@ static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
     }
 
     if (!nvdimm) {
-        nvdimm_dsm_no_payload(2 /* Non-Existing Memory Device */,
+        nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_NOMEMDEV,
                               dsm_mem_addr);
         return;
     }
@@ -793,7 +800,7 @@ static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
         break;
     }
 
-    nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr);
+    nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
 }
 
 static uint64_t
@@ -830,7 +837,7 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
     if (in->revision != 0x1 /* Currently we only support DSM Spec Rev1. */) {
         nvdimm_debug("Revision %#x is not supported, expect %#x.\n",
                      in->revision, 0x1);
-        nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr);
+        nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
         goto exit;
     }
 
@@ -1018,7 +1025,7 @@ static void nvdimm_build_common_dsm(Aml *dev)
     aml_append(unsupport, ifctx);
 
     /* No function is supported yet. */
-    byte_list[0] = 1 /* Not Supported */;
+    byte_list[0] = NVDIMM_DSM_RET_STATUS_UNSUPPORT;
     aml_append(unsupport, aml_return(aml_buffer(1, byte_list)));
     aml_append(method, unsupport);
 
@@ -1119,7 +1126,8 @@ static void nvdimm_build_fit(Aml *dev)
                                  aml_name(NVDIMM_DSM_RFIT_STATUS)));
 
      /* if something is wrong during _DSM. */
-    ifcond = aml_equal(aml_int(0 /* Success */), aml_name("STAU"));
+    ifcond = aml_equal(aml_int(NVDIMM_DSM_RET_STATUS_SUCCESS),
+                       aml_name("STAU"));
     ifctx = aml_if(aml_lnot(ifcond));
     aml_append(ifctx, aml_return(aml_buffer(0, NULL)));
     aml_append(method, ifctx);
@@ -1156,7 +1164,7 @@ static void nvdimm_build_fit(Aml *dev)
      * again.
      */
     ifctx = aml_if(aml_equal(aml_name(NVDIMM_DSM_RFIT_STATUS),
-                             aml_int(0x100 /* fit changed */)));
+                             aml_int(NVDIMM_DSM_RET_STATUS_FIT_CHANGED)));
     aml_append(ifctx, aml_store(aml_buffer(0, NULL), fit));
     aml_append(ifctx, aml_store(aml_int(0), offset));
     aml_append(whilectx, ifctx);
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 30/47] nvdimm acpi: fix two comments
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (28 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 29/47] nvdimm acpi: define DSM return codes Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 31/47] nvdimm acpi: rename nvdimm_dsm_reserved_root Michael S. Tsirkin
                   ` (17 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Stefan Hajnoczi, Igor Mammedov

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

fixed the English issue and code-style issue

Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/acpi/nvdimm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index c7e7744..6f6f51f 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -479,7 +479,7 @@ QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncSetLabelDataIn) +
                   offsetof(NvdimmDsmIn, arg3) > 4096);
 
 struct NvdimmFuncReadFITIn {
-    uint32_t offset; /* the offset of FIT buffer. */
+    uint32_t offset; /* the offset into FIT buffer. */
 } QEMU_PACKED;
 typedef struct NvdimmFuncReadFITIn NvdimmFuncReadFITIn;
 QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncReadFITIn) +
@@ -578,7 +578,7 @@ static void nvdimm_dsm_reserved_root(AcpiNVDIMMState *state, NvdimmDsmIn *in,
     case 0x0:
         nvdimm_dsm_function0(0x1 | 1 << 1 /* Read FIT */, dsm_mem_addr);
         return;
-    case 0x1 /*Read FIT */:
+    case 0x1 /* Read FIT */:
         nvdimm_dsm_func_read_fit(state, in, dsm_mem_addr);
         return;
     }
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 31/47] nvdimm acpi: rename nvdimm_dsm_reserved_root
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (29 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 30/47] nvdimm acpi: fix two comments Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 32/47] nvdimm acpi: use aml_name_decl to define named object Michael S. Tsirkin
                   ` (16 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

Rename it to nvdimm_dsm_handle_reserved_root_method

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/acpi/nvdimm.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 6f6f51f..6692648 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -571,8 +571,9 @@ exit:
     g_free(read_fit_out);
 }
 
-static void nvdimm_dsm_reserved_root(AcpiNVDIMMState *state, NvdimmDsmIn *in,
-                                     hwaddr dsm_mem_addr)
+static void
+nvdimm_dsm_handle_reserved_root_method(AcpiNVDIMMState *state,
+                                       NvdimmDsmIn *in, hwaddr dsm_mem_addr)
 {
     switch (in->function) {
     case 0x0:
@@ -842,7 +843,7 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
     }
 
     if (in->handle == NVDIMM_QEMU_RSVD_HANDLE_ROOT) {
-        nvdimm_dsm_reserved_root(state, in, dsm_mem_addr);
+        nvdimm_dsm_handle_reserved_root_method(state, in, dsm_mem_addr);
         goto exit;
     }
 
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 32/47] nvdimm acpi: use aml_name_decl to define named object
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (30 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 31/47] nvdimm acpi: rename nvdimm_dsm_reserved_root Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 33/47] nvdimm acpi: introduce NVDIMM_DSM_MEMORY_SIZE Michael S. Tsirkin
                   ` (15 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

to make the code more clearer

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/acpi/nvdimm.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 6692648..5f48b75 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -1098,13 +1098,11 @@ static void nvdimm_build_fit(Aml *dev)
     buf_size = aml_local(1);
     fit = aml_local(2);
 
-    aml_append(dev, aml_create_dword_field(aml_buffer(4, NULL),
-               aml_int(0), NVDIMM_DSM_RFIT_STATUS));
+    aml_append(dev, aml_name_decl(NVDIMM_DSM_RFIT_STATUS, aml_int(0)));
 
     /* build helper function, RFIT. */
     method = aml_method("RFIT", 1, AML_SERIALIZED);
-    aml_append(method, aml_create_dword_field(aml_buffer(4, NULL),
-                                              aml_int(0), "OFST"));
+    aml_append(method, aml_name_decl("OFST", aml_int(0)));
 
     /* prepare input package. */
     pkg = aml_package(1);
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 33/47] nvdimm acpi: introduce NVDIMM_DSM_MEMORY_SIZE
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (31 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 32/47] nvdimm acpi: use aml_name_decl to define named object Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:13 ` [Qemu-devel] [PULL 34/47] msix: Follow CODING_STYLE Michael S. Tsirkin
                   ` (14 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Xiao Guangrong, Igor Mammedov, Stefan Hajnoczi

From: Xiao Guangrong <guangrong.xiao@linux.intel.com>

and use it to replace the raw number

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/acpi/nvdimm.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 5f48b75..8e7d6ec 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -404,6 +404,8 @@ static void nvdimm_build_nfit(AcpiNVDIMMState *state, GArray *table_offsets,
                  sizeof(NvdimmNfitHeader) + fit_buf->fit->len, 1, NULL, NULL);
 }
 
+#define NVDIMM_DSM_MEMORY_SIZE      4096
+
 struct NvdimmDsmIn {
     uint32_t handle;
     uint32_t revision;
@@ -414,7 +416,7 @@ struct NvdimmDsmIn {
     };
 } QEMU_PACKED;
 typedef struct NvdimmDsmIn NvdimmDsmIn;
-QEMU_BUILD_BUG_ON(sizeof(NvdimmDsmIn) != 4096);
+QEMU_BUILD_BUG_ON(sizeof(NvdimmDsmIn) != NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmDsmOut {
     /* the size of buffer filled by QEMU. */
@@ -422,7 +424,7 @@ struct NvdimmDsmOut {
     uint8_t data[4092];
 } QEMU_PACKED;
 typedef struct NvdimmDsmOut NvdimmDsmOut;
-QEMU_BUILD_BUG_ON(sizeof(NvdimmDsmOut) != 4096);
+QEMU_BUILD_BUG_ON(sizeof(NvdimmDsmOut) != NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmDsmFunc0Out {
     /* the size of buffer filled by QEMU. */
@@ -450,7 +452,7 @@ struct NvdimmFuncGetLabelSizeOut {
     uint32_t max_xfer;
 } QEMU_PACKED;
 typedef struct NvdimmFuncGetLabelSizeOut NvdimmFuncGetLabelSizeOut;
-QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelSizeOut) > 4096);
+QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelSizeOut) > NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmFuncGetLabelDataIn {
     uint32_t offset; /* the offset in the namespace label data area. */
@@ -458,7 +460,7 @@ struct NvdimmFuncGetLabelDataIn {
 } QEMU_PACKED;
 typedef struct NvdimmFuncGetLabelDataIn NvdimmFuncGetLabelDataIn;
 QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelDataIn) +
-                  offsetof(NvdimmDsmIn, arg3) > 4096);
+                  offsetof(NvdimmDsmIn, arg3) > NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmFuncGetLabelDataOut {
     /* the size of buffer filled by QEMU. */
@@ -467,7 +469,7 @@ struct NvdimmFuncGetLabelDataOut {
     uint8_t out_buf[0]; /* the data got via Get Namesapce Label function. */
 } QEMU_PACKED;
 typedef struct NvdimmFuncGetLabelDataOut NvdimmFuncGetLabelDataOut;
-QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelDataOut) > 4096);
+QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelDataOut) > NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmFuncSetLabelDataIn {
     uint32_t offset; /* the offset in the namespace label data area. */
@@ -476,14 +478,14 @@ struct NvdimmFuncSetLabelDataIn {
 } QEMU_PACKED;
 typedef struct NvdimmFuncSetLabelDataIn NvdimmFuncSetLabelDataIn;
 QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncSetLabelDataIn) +
-                  offsetof(NvdimmDsmIn, arg3) > 4096);
+                  offsetof(NvdimmDsmIn, arg3) > NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmFuncReadFITIn {
     uint32_t offset; /* the offset into FIT buffer. */
 } QEMU_PACKED;
 typedef struct NvdimmFuncReadFITIn NvdimmFuncReadFITIn;
 QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncReadFITIn) +
-                  offsetof(NvdimmDsmIn, arg3) > 4096);
+                  offsetof(NvdimmDsmIn, arg3) > NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmFuncReadFITOut {
     /* the size of buffer filled by QEMU. */
@@ -492,7 +494,7 @@ struct NvdimmFuncReadFITOut {
     uint8_t fit[0]; /* the FIT data. */
 } QEMU_PACKED;
 typedef struct NvdimmFuncReadFITOut NvdimmFuncReadFITOut;
-QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncReadFITOut) > 4096);
+QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncReadFITOut) > NVDIMM_DSM_MEMORY_SIZE);
 
 static void
 nvdimm_dsm_function0(uint32_t supported_func, hwaddr dsm_mem_addr)
@@ -556,7 +558,7 @@ static void nvdimm_dsm_func_read_fit(AcpiNVDIMMState *state, NvdimmDsmIn *in,
 
     func_ret_status = NVDIMM_DSM_RET_STATUS_SUCCESS;
     read_len = MIN(fit->len - read_fit->offset,
-                   4096 - sizeof(NvdimmFuncReadFITOut));
+                   NVDIMM_DSM_MEMORY_SIZE - sizeof(NvdimmFuncReadFITOut));
 
 exit:
     size = sizeof(NvdimmFuncReadFITOut) + read_len;
@@ -610,7 +612,9 @@ static void nvdimm_dsm_root(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
  */
 static uint32_t nvdimm_get_max_xfer_label_size(void)
 {
-    uint32_t max_get_size, max_set_size, dsm_memory_size = 4096;
+    uint32_t max_get_size, max_set_size, dsm_memory_size;
+
+    dsm_memory_size = NVDIMM_DSM_MEMORY_SIZE;
 
     /*
      * the max data ACPI can read one time which is transferred by
@@ -707,7 +711,7 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
     }
 
     size = sizeof(*get_label_data_out) + get_label_data->length;
-    assert(size <= 4096);
+    assert(size <= NVDIMM_DSM_MEMORY_SIZE);
     get_label_data_out = g_malloc(size);
 
     get_label_data_out->len = cpu_to_le32(size);
@@ -745,8 +749,8 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
         return;
     }
 
-    assert(offsetof(NvdimmDsmIn, arg3) +
-           sizeof(*set_label_data) + set_label_data->length <= 4096);
+    assert(offsetof(NvdimmDsmIn, arg3) + sizeof(*set_label_data) +
+                    set_label_data->length <= NVDIMM_DSM_MEMORY_SIZE);
 
     nvc->write_label_data(nvdimm, set_label_data->in_buf,
                           set_label_data->length, set_label_data->offset);
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 34/47] msix: Follow CODING_STYLE
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (32 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 33/47] nvdimm acpi: introduce NVDIMM_DSM_MEMORY_SIZE Michael S. Tsirkin
@ 2016-11-10 16:13 ` Michael S. Tsirkin
  2016-11-10 16:14 ` [Qemu-devel] [PULL 35/47] hcd-xhci: check & correct param before using it Michael S. Tsirkin
                   ` (13 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Cao jin, Markus Armbruster, Marcel Apfelbaum

From: Cao jin <caoj.fnst@cn.fujitsu.com>

CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/msix.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 0ec1cb1..0cee631 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -447,8 +447,10 @@ void msix_notify(PCIDevice *dev, unsigned vector)
 {
     MSIMessage msg;
 
-    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
         return;
+    }
+
     if (msix_is_masked(dev, vector)) {
         msix_set_pending(dev, vector);
         return;
@@ -483,8 +485,10 @@ void msix_reset(PCIDevice *dev)
 /* Mark vector as used. */
 int msix_vector_use(PCIDevice *dev, unsigned vector)
 {
-    if (vector >= dev->msix_entries_nr)
+    if (vector >= dev->msix_entries_nr) {
         return -EINVAL;
+    }
+
     dev->msix_entry_used[vector]++;
     return 0;
 }
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 35/47] hcd-xhci: check & correct param before using it
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (33 preceding siblings ...)
  2016-11-10 16:13 ` [Qemu-devel] [PULL 34/47] msix: Follow CODING_STYLE Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-10 16:14 ` [Qemu-devel] [PULL 36/47] pci: Convert msix_init() to Error and fix callers to check it Michael S. Tsirkin
                   ` (12 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Cao jin, Gerd Hoffmann, Markus Armbruster,
	Marcel Apfelbaum

From: Cao jin <caoj.fnst@cn.fujitsu.com>

usb_xhci_realize() corrects invalid values of property "intrs"
automatically, but the uncorrected value is passed to msi_init(),
which chokes on invalid values.  Delay that until after the
correction.

Resources allocated by usb_xhci_init() are leaked when msi_init()
fails.  Fix by calling it after msi_init().

CC: Gerd Hoffmann <kraxel@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>

Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/usb/hcd-xhci.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 4acf0c6..eb1dca5 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -3627,25 +3627,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
     dev->config[PCI_CACHE_LINE_SIZE] = 0x10;
     dev->config[0x60] = 0x30; /* release number */
 
-    usb_xhci_init(xhci);
-
-    if (xhci->msi != ON_OFF_AUTO_OFF) {
-        ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err);
-        /* Any error other than -ENOTSUP(board's MSI support is broken)
-         * is a programming error */
-        assert(!ret || ret == -ENOTSUP);
-        if (ret && xhci->msi == ON_OFF_AUTO_ON) {
-            /* Can't satisfy user's explicit msi=on request, fail */
-            error_append_hint(&err, "You have to use msi=auto (default) or "
-                    "msi=off with this machine type.\n");
-            error_propagate(errp, err);
-            return;
-        }
-        assert(!err || xhci->msi == ON_OFF_AUTO_AUTO);
-        /* With msi=auto, we fall back to MSI off silently */
-        error_free(err);
-    }
-
     if (xhci->numintrs > MAXINTRS) {
         xhci->numintrs = MAXINTRS;
     }
@@ -3667,7 +3648,22 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
         xhci->max_pstreams_mask = 0;
     }
 
-    xhci->mfwrap_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, xhci_mfwrap_timer, xhci);
+    if (xhci->msi != ON_OFF_AUTO_OFF) {
+        ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err);
+        /* Any error other than -ENOTSUP(board's MSI support is broken)
+         * is a programming error */
+        assert(!ret || ret == -ENOTSUP);
+        if (ret && xhci->msi == ON_OFF_AUTO_ON) {
+            /* Can't satisfy user's explicit msi=on request, fail */
+            error_append_hint(&err, "You have to use msi=auto (default) or "
+                    "msi=off with this machine type.\n");
+            error_propagate(errp, err);
+            return;
+        }
+        assert(!err || xhci->msi == ON_OFF_AUTO_AUTO);
+        /* With msi=auto, we fall back to MSI off silently */
+        error_free(err);
+    }
 
     memory_region_init(&xhci->mem, OBJECT(xhci), "xhci", LEN_REGS);
     memory_region_init_io(&xhci->mem_cap, OBJECT(xhci), &xhci_cap_ops, xhci,
@@ -3697,6 +3693,9 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
                      PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64,
                      &xhci->mem);
 
+    usb_xhci_init(xhci);
+    xhci->mfwrap_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, xhci_mfwrap_timer, xhci);
+
     if (pci_bus_is_express(dev->bus) ||
         xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) {
         ret = pcie_endpoint_cap_init(dev, 0xa0);
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 36/47] pci: Convert msix_init() to Error and fix callers to check it
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (34 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 35/47] hcd-xhci: check & correct param before using it Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-11  6:55   ` Hannes Reinecke
  2016-11-10 16:14 ` [Qemu-devel] [PULL 37/47] megasas: change behaviour of msix switch Michael S. Tsirkin
                   ` (11 subsequent siblings)
  47 siblings, 1 reply; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Cao jin, Jiri Pirko, Gerd Hoffmann,
	Dmitry Fleytman, Jason Wang, Hannes Reinecke, Paolo Bonzini,
	Alex Williamson, Markus Armbruster, Marcel Apfelbaum,
	Keith Busch, Kevin Wolf, Max Reitz, qemu-block

From: Cao jin <caoj.fnst@cn.fujitsu.com>

msix_init() reports errors with error_report(), which is wrong when
it's used in realize().  The same issue was fixed for msi_init() in
commit 1108b2f.

For some devices(like e1000e, vmxnet3) who won't fail because of
msix_init's failure, suppress the error report by passing NULL error object.

Bonus: add comment for msix_init.

CC: Jiri Pirko <jiri@resnulli.us>
CC: Gerd Hoffmann <kraxel@redhat.com>
CC: Dmitry Fleytman <dmitry@daynix.com>
CC: Jason Wang <jasowang@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Hannes Reinecke <hare@suse.de>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Alex Williamson <alex.williamson@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/pci/msix.h  |  5 +++--
 hw/block/nvme.c        |  5 ++++-
 hw/misc/ivshmem.c      |  8 ++++----
 hw/net/e1000e.c        |  6 +++++-
 hw/net/rocker/rocker.c |  7 ++++++-
 hw/net/vmxnet3.c       |  8 ++++++--
 hw/pci/msix.c          | 34 +++++++++++++++++++++++++++++-----
 hw/scsi/megasas.c      |  5 ++++-
 hw/usb/hcd-xhci.c      | 13 ++++++++-----
 hw/vfio/pci.c          |  8 ++++++--
 hw/virtio/virtio-pci.c | 11 +++++------
 11 files changed, 80 insertions(+), 30 deletions(-)

diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h
index 048a29d..1f27658 100644
--- a/include/hw/pci/msix.h
+++ b/include/hw/pci/msix.h
@@ -9,9 +9,10 @@ MSIMessage msix_get_message(PCIDevice *dev, unsigned int vector);
 int msix_init(PCIDevice *dev, unsigned short nentries,
               MemoryRegion *table_bar, uint8_t table_bar_nr,
               unsigned table_offset, MemoryRegion *pba_bar,
-              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos);
+              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos,
+              Error **errp);
 int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
-                            uint8_t bar_nr);
+                            uint8_t bar_nr, Error **errp);
 
 void msix_write_config(PCIDevice *dev, uint32_t address, uint32_t val, int len);
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index d479fd2..2d703c8 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -831,6 +831,7 @@ static int nvme_init(PCIDevice *pci_dev)
 {
     NvmeCtrl *n = NVME(pci_dev);
     NvmeIdCtrl *id = &n->id_ctrl;
+    Error *err = NULL;
 
     int i;
     int64_t bs_size;
@@ -872,7 +873,9 @@ static int nvme_init(PCIDevice *pci_dev)
     pci_register_bar(&n->parent_obj, 0,
         PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
         &n->iomem);
-    msix_init_exclusive_bar(&n->parent_obj, n->num_queues, 4);
+    if (msix_init_exclusive_bar(&n->parent_obj, n->num_queues, 4, &err)) {
+        error_report_err(err);
+    }
 
     id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
     id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 230e51b..ca6f804 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -749,13 +749,13 @@ static void ivshmem_reset(DeviceState *d)
     }
 }
 
-static int ivshmem_setup_interrupts(IVShmemState *s)
+static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
 {
     /* allocate QEMU callback data for receiving interrupts */
     s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
 
     if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
-        if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1)) {
+        if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
             return -1;
         }
 
@@ -897,8 +897,8 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
         qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
                                  ivshmem_read, NULL, s, NULL, true);
 
-        if (ivshmem_setup_interrupts(s) < 0) {
-            error_setg(errp, "failed to initialize interrupts");
+        if (ivshmem_setup_interrupts(s, errp) < 0) {
+            error_prepend(errp, "Failed to initialize interrupts: ");
             return;
         }
     }
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index 89f96eb..bf32aca 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -292,7 +292,11 @@ e1000e_init_msix(E1000EState *s)
                         E1000E_MSIX_IDX, E1000E_MSIX_TABLE,
                         &s->msix,
                         E1000E_MSIX_IDX, E1000E_MSIX_PBA,
-                        0xA0);
+                        0xA0, NULL);
+
+    /* Any error other than -ENOTSUP(board's MSI support is broken)
+     * is a programming error. Fall back to INTx silently on -ENOTSUP */
+    assert(!res || res == -ENOTSUP);
 
     if (res < 0) {
         trace_e1000e_msix_init_fail(res);
diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index e9d215a..8f829f2 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -1256,14 +1256,19 @@ static int rocker_msix_init(Rocker *r)
 {
     PCIDevice *dev = PCI_DEVICE(r);
     int err;
+    Error *local_err = NULL;
 
     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
                     &r->msix_bar,
                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
                     &r->msix_bar,
                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
-                    0);
+                    0, &local_err);
+    /* Any error other than -ENOTSUP(board's MSI support is broken)
+     * is a programming error. */
+    assert(!err || err == -ENOTSUP);
     if (err) {
+        error_report_err(local_err);
         return err;
     }
 
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 92f6af9..a433cc0 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2191,10 +2191,14 @@ vmxnet3_init_msix(VMXNET3State *s)
                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
                         &s->msix_bar,
                         VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
-                        VMXNET3_MSIX_OFFSET(s));
+                        VMXNET3_MSIX_OFFSET(s), NULL);
+
+    /* Any error other than -ENOTSUP(board's MSI support is broken)
+     * is a programming error. Fall back to INTx on -ENOTSUP */
+    assert(!res || res == -ENOTSUP);
 
     if (0 > res) {
-        VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
+        VMW_WRPRN("Failed to initialize MSI-X, board's MSI support is broken");
         s->msix_used = false;
     } else {
         if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 0cee631..d03016f 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -21,6 +21,7 @@
 #include "hw/pci/pci.h"
 #include "hw/xen/xen.h"
 #include "qemu/range.h"
+#include "qapi/error.h"
 
 #define MSIX_CAP_LENGTH 12
 
@@ -238,11 +239,29 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
     }
 }
 
-/* Initialize the MSI-X structures */
+/* Make PCI device @dev MSI-X capable
+ * @nentries is the max number of MSI-X vectors that the device support.
+ * @table_bar is the MemoryRegion that MSI-X table structure resides.
+ * @table_bar_nr is number of base address register corresponding to @table_bar.
+ * @table_offset indicates the offset that the MSI-X table structure starts with
+ * in @table_bar.
+ * @pba_bar is the MemoryRegion that the Pending Bit Array structure resides.
+ * @pba_bar_nr is number of base address register corresponding to @pba_bar.
+ * @pba_offset indicates the offset that the Pending Bit Array structure
+ * starts with in @pba_bar.
+ * Non-zero @cap_pos puts capability MSI-X at that offset in PCI config space.
+ * @errp is for returning errors.
+ *
+ * Return 0 on success; set @errp and return -errno on error:
+ * -ENOTSUP means lacking msi support for a msi-capable platform.
+ * -EINVAL means capability overlap, happens when @cap_pos is non-zero,
+ * also means a programming error, except device assignment, which can check
+ * if a real HW is broken.*/
 int msix_init(struct PCIDevice *dev, unsigned short nentries,
               MemoryRegion *table_bar, uint8_t table_bar_nr,
               unsigned table_offset, MemoryRegion *pba_bar,
-              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
+              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos,
+              Error **errp)
 {
     int cap;
     unsigned table_size, pba_size;
@@ -250,10 +269,12 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
 
     /* Nothing to do if MSI is not supported by interrupt controller */
     if (!msi_nonbroken) {
+        error_setg(errp, "MSI-X is not supported by interrupt controller");
         return -ENOTSUP;
     }
 
     if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
+        error_setg(errp, "The number of MSI-X vectors is invalid");
         return -EINVAL;
     }
 
@@ -266,10 +287,13 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
         table_offset + table_size > memory_region_size(table_bar) ||
         pba_offset + pba_size > memory_region_size(pba_bar) ||
         (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
+        error_setg(errp, "table & pba overlap, or they don't fit in BARs,"
+                   " or don't align");
         return -EINVAL;
     }
 
-    cap = pci_add_capability(dev, PCI_CAP_ID_MSIX, cap_pos, MSIX_CAP_LENGTH);
+    cap = pci_add_capability2(dev, PCI_CAP_ID_MSIX,
+                              cap_pos, MSIX_CAP_LENGTH, errp);
     if (cap < 0) {
         return cap;
     }
@@ -306,7 +330,7 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
 }
 
 int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
-                            uint8_t bar_nr)
+                            uint8_t bar_nr, Error **errp)
 {
     int ret;
     char *name;
@@ -338,7 +362,7 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
     ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr,
                     0, &dev->msix_exclusive_bar,
                     bar_nr, bar_pba_offset,
-                    0);
+                    0, errp);
     if (ret) {
         return ret;
     }
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 52a4123..fada834 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -2360,9 +2360,12 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
 
     if (megasas_use_msix(s) &&
         msix_init(dev, 15, &s->mmio_io, b->mmio_bar, 0x2000,
-                  &s->mmio_io, b->mmio_bar, 0x3800, 0x68)) {
+                  &s->mmio_io, b->mmio_bar, 0x3800, 0x68, &err)) {
+        /*TODO: check msix_init's error, and should fail on msix=on */
+        error_report_err(err);
         s->msix = ON_OFF_AUTO_OFF;
     }
+
     if (pci_is_express(dev)) {
         pcie_endpoint_cap_init(dev, 0xa0);
     }
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index eb1dca5..05dc944 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -3703,11 +3703,14 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
     }
 
     if (xhci->msix != ON_OFF_AUTO_OFF) {
-        /* TODO check for errors */
-        msix_init(dev, xhci->numintrs,
-                  &xhci->mem, 0, OFF_MSIX_TABLE,
-                  &xhci->mem, 0, OFF_MSIX_PBA,
-                  0x90);
+        /* TODO check for errors, and should fail when msix=on */
+        ret = msix_init(dev, xhci->numintrs,
+                        &xhci->mem, 0, OFF_MSIX_TABLE,
+                        &xhci->mem, 0, OFF_MSIX_PBA,
+                        0x90, &err);
+        if (ret) {
+            error_report_err(err);
+        }
     }
 }
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d7dbe0e..0bcfaad 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1432,6 +1432,7 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
 static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
 {
     int ret;
+    Error *local_err = NULL;
 
     vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) *
                                     sizeof(unsigned long));
@@ -1439,12 +1440,15 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
                     vdev->bars[vdev->msix->table_bar].region.mem,
                     vdev->msix->table_bar, vdev->msix->table_offset,
                     vdev->bars[vdev->msix->pba_bar].region.mem,
-                    vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
+                    vdev->msix->pba_bar, vdev->msix->pba_offset, pos,
+                    &local_err);
     if (ret < 0) {
         if (ret == -ENOTSUP) {
+            error_report_err(local_err);
             return 0;
         }
-        error_setg(errp, "msix_init failed");
+
+        error_propagate(errp, local_err);
         return ret;
     }
 
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 97b32fe..2e1766e 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1672,13 +1672,12 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
 
     if (proxy->nvectors) {
         int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
-                                          proxy->msix_bar_idx);
+                                          proxy->msix_bar_idx, errp);
+        /* Any error other than -ENOTSUP(board's MSI support is broken)
+         * is a programming error. */
+        assert(!err || err == -ENOTSUP);
         if (err) {
-            /* Notice when a system that supports MSIx can't initialize it.  */
-            if (err != -ENOTSUP) {
-                error_report("unable to init msix vectors to %" PRIu32,
-                             proxy->nvectors);
-            }
+            error_report_err(*errp);
             proxy->nvectors = 0;
         }
     }
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 37/47] megasas: change behaviour of msix switch
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (35 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 36/47] pci: Convert msix_init() to Error and fix callers to check it Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-11  6:56   ` Hannes Reinecke
  2016-11-10 16:14 ` [Qemu-devel] [PULL 38/47] hcd-xhci: " Michael S. Tsirkin
                   ` (10 subsequent siblings)
  47 siblings, 1 reply; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Cao jin, Hannes Reinecke, Paolo Bonzini,
	Markus Armbruster, Marcel Apfelbaum, qemu-block

From: Cao jin <caoj.fnst@cn.fujitsu.com>

Resolve the TODO, msix=auto means msix on; if user specify msix=on,
then device creation fail on msix_init failure.
Also undo the overwrites of user configuration of msix.

CC: Michael S. Tsirkin <mst@redhat.com>
CC: Hannes Reinecke <hare@suse.de>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/scsi/megasas.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index fada834..6cef9a3 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -2353,19 +2353,31 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
 
     memory_region_init_io(&s->mmio_io, OBJECT(s), &megasas_mmio_ops, s,
                           "megasas-mmio", 0x4000);
+    if (megasas_use_msix(s)) {
+        ret = msix_init(dev, 15, &s->mmio_io, b->mmio_bar, 0x2000,
+                        &s->mmio_io, b->mmio_bar, 0x3800, 0x68, &err);
+        /* Any error other than -ENOTSUP(board's MSI support is broken)
+         * is a programming error */
+        assert(!ret || ret == -ENOTSUP);
+        if (ret && s->msix == ON_OFF_AUTO_ON) {
+            /* Can't satisfy user's explicit msix=on request, fail */
+            error_append_hint(&err, "You have to use msix=auto (default) or "
+                    "msix=off with this machine type.\n");
+            /* No instance_finalize method, need to free the resource here */
+            object_unref(OBJECT(&s->mmio_io));
+            error_propagate(errp, err);
+            return;
+        }
+        assert(!err || s->msix == ON_OFF_AUTO_AUTO);
+        /* With msix=auto, we fall back to MSI off silently */
+        error_free(err);
+    }
+
     memory_region_init_io(&s->port_io, OBJECT(s), &megasas_port_ops, s,
                           "megasas-io", 256);
     memory_region_init_io(&s->queue_io, OBJECT(s), &megasas_queue_ops, s,
                           "megasas-queue", 0x40000);
 
-    if (megasas_use_msix(s) &&
-        msix_init(dev, 15, &s->mmio_io, b->mmio_bar, 0x2000,
-                  &s->mmio_io, b->mmio_bar, 0x3800, 0x68, &err)) {
-        /*TODO: check msix_init's error, and should fail on msix=on */
-        error_report_err(err);
-        s->msix = ON_OFF_AUTO_OFF;
-    }
-
     if (pci_is_express(dev)) {
         pcie_endpoint_cap_init(dev, 0xa0);
     }
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 38/47] hcd-xhci: change behaviour of msix switch
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (36 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 37/47] megasas: change behaviour of msix switch Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-10 16:14 ` [Qemu-devel] [PULL 39/47] megasas: remove unnecessary megasas_use_msix() Michael S. Tsirkin
                   ` (9 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Cao jin, Gerd Hoffmann, Markus Armbruster,
	Marcel Apfelbaum

From: Cao jin <caoj.fnst@cn.fujitsu.com>

Resolve the TODO, msix=auto means msix on; if user specify msix=on,
then device creation fail on msix_init failure.

CC: Gerd Hoffmann <kraxel@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/usb/hcd-xhci.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 05dc944..4767045 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -3636,12 +3636,14 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
     if (xhci->numintrs < 1) {
         xhci->numintrs = 1;
     }
+
     if (xhci->numslots > MAXSLOTS) {
         xhci->numslots = MAXSLOTS;
     }
     if (xhci->numslots < 1) {
         xhci->numslots = 1;
     }
+
     if (xhci_get_flag(xhci, XHCI_FLAG_ENABLE_STREAMS)) {
         xhci->max_pstreams_mask = 7; /* == 256 primary streams */
     } else {
@@ -3666,6 +3668,28 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
     }
 
     memory_region_init(&xhci->mem, OBJECT(xhci), "xhci", LEN_REGS);
+    if (xhci->msix != ON_OFF_AUTO_OFF) {
+        ret = msix_init(dev, xhci->numintrs,
+                        &xhci->mem, 0, OFF_MSIX_TABLE,
+                        &xhci->mem, 0, OFF_MSIX_PBA,
+                        0x90, &err);
+        /* Any error other than -ENOTSUP(board's MSI support is broken)
+         * is a programming error */
+        assert(!ret || ret == -ENOTSUP);
+        if (ret && xhci->msix == ON_OFF_AUTO_ON) {
+            /* Can't satisfy user's explicit msix=on request, fail */
+            error_append_hint(&err, "You have to use msix=auto (default) or "
+                    "msix=off with this machine type.\n");
+            /* No instance_finalize method, need to free the resource here */
+            object_unref(OBJECT(&xhci->mem));
+            error_propagate(errp, err);
+            return;
+        }
+        assert(!err || xhci->msix == ON_OFF_AUTO_AUTO);
+        /* With msix=auto, we fall back to MSI off silently */
+        error_free(err);
+    }
+
     memory_region_init_io(&xhci->mem_cap, OBJECT(xhci), &xhci_cap_ops, xhci,
                           "capabilities", LEN_CAP);
     memory_region_init_io(&xhci->mem_oper, OBJECT(xhci), &xhci_oper_ops, xhci,
@@ -3701,17 +3725,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp)
         ret = pcie_endpoint_cap_init(dev, 0xa0);
         assert(ret >= 0);
     }
-
-    if (xhci->msix != ON_OFF_AUTO_OFF) {
-        /* TODO check for errors, and should fail when msix=on */
-        ret = msix_init(dev, xhci->numintrs,
-                        &xhci->mem, 0, OFF_MSIX_TABLE,
-                        &xhci->mem, 0, OFF_MSIX_PBA,
-                        0x90, &err);
-        if (ret) {
-            error_report_err(err);
-        }
-    }
 }
 
 static void usb_xhci_exit(PCIDevice *dev)
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 39/47] megasas: remove unnecessary megasas_use_msix()
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (37 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 38/47] hcd-xhci: " Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-11  6:57   ` Hannes Reinecke
  2016-11-10 16:14 ` [Qemu-devel] [PULL 40/47] megasas: undo the overwrites of msi user configuration Michael S. Tsirkin
                   ` (8 subsequent siblings)
  47 siblings, 1 reply; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Cao jin, Hannes Reinecke, Paolo Bonzini,
	Markus Armbruster, Marcel Apfelbaum, qemu-block

From: Cao jin <caoj.fnst@cn.fujitsu.com>

Also move certain hunk above, to place msix init related code together.

CC: Hannes Reinecke <hare@suse.de>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/scsi/megasas.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 6cef9a3..ba79e7a 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -155,11 +155,6 @@ static bool megasas_use_queue64(MegasasState *s)
     return s->flags & MEGASAS_MASK_USE_QUEUE64;
 }
 
-static bool megasas_use_msix(MegasasState *s)
-{
-    return s->msix != ON_OFF_AUTO_OFF;
-}
-
 static bool megasas_is_jbod(MegasasState *s)
 {
     return s->flags & MEGASAS_MASK_USE_JBOD;
@@ -2299,9 +2294,7 @@ static void megasas_scsi_uninit(PCIDevice *d)
 {
     MegasasState *s = MEGASAS(d);
 
-    if (megasas_use_msix(s)) {
-        msix_uninit(d, &s->mmio_io, &s->mmio_io);
-    }
+    msix_uninit(d, &s->mmio_io, &s->mmio_io);
     msi_uninit(d);
 }
 
@@ -2353,7 +2346,7 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
 
     memory_region_init_io(&s->mmio_io, OBJECT(s), &megasas_mmio_ops, s,
                           "megasas-mmio", 0x4000);
-    if (megasas_use_msix(s)) {
+    if (s->msix != ON_OFF_AUTO_OFF) {
         ret = msix_init(dev, 15, &s->mmio_io, b->mmio_bar, 0x2000,
                         &s->mmio_io, b->mmio_bar, 0x3800, 0x68, &err);
         /* Any error other than -ENOTSUP(board's MSI support is broken)
@@ -2373,6 +2366,10 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
         error_free(err);
     }
 
+    if (msix_enabled(dev)) {
+        msix_vector_use(dev, 0);
+    }
+
     memory_region_init_io(&s->port_io, OBJECT(s), &megasas_port_ops, s,
                           "megasas-io", 256);
     memory_region_init_io(&s->queue_io, OBJECT(s), &megasas_queue_ops, s,
@@ -2388,10 +2385,6 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
     pci_register_bar(dev, b->mmio_bar, bar_type, &s->mmio_io);
     pci_register_bar(dev, 3, bar_type, &s->queue_io);
 
-    if (megasas_use_msix(s)) {
-        msix_vector_use(dev, 0);
-    }
-
     s->fw_state = MFI_FWSTATE_READY;
     if (!s->sas_addr) {
         s->sas_addr = ((NAA_LOCALLY_ASSIGNED_ID << 24) |
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 40/47] megasas: undo the overwrites of msi user configuration
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (38 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 39/47] megasas: remove unnecessary megasas_use_msix() Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-11  6:57   ` Hannes Reinecke
  2016-11-10 16:14 ` [Qemu-devel] [PULL 41/47] vmxnet3: fix reference leak issue Michael S. Tsirkin
                   ` (7 subsequent siblings)
  47 siblings, 1 reply; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Cao jin, Hannes Reinecke, Paolo Bonzini,
	Markus Armbruster, Marcel Apfelbaum, qemu-block

From: Cao jin <caoj.fnst@cn.fujitsu.com>

Commit afea4e14 seems forgetting to undo the overwrites, which is
unsuitable.

CC: Hannes Reinecke <hare@suse.de>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/scsi/megasas.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index ba79e7a..bbef9e9 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -2337,11 +2337,10 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
                     "msi=off with this machine type.\n");
             error_propagate(errp, err);
             return;
-        } else if (ret) {
-            /* With msi=auto, we fall back to MSI off silently */
-            s->msi = ON_OFF_AUTO_OFF;
-            error_free(err);
         }
+        assert(!err || s->msix == ON_OFF_AUTO_AUTO);
+        /* With msi=auto, we fall back to MSI off silently */
+        error_free(err);
     }
 
     memory_region_init_io(&s->mmio_io, OBJECT(s), &megasas_mmio_ops, s,
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 41/47] vmxnet3: fix reference leak issue
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (39 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 40/47] megasas: undo the overwrites of msi user configuration Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-10 16:14 ` [Qemu-devel] [PULL 42/47] vmxnet3: remove unnecessary internal msix flag Michael S. Tsirkin
                   ` (6 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Cao jin, Dmitry Fleytman, Jason Wang,
	Markus Armbruster, Marcel Apfelbaum

From: Cao jin <caoj.fnst@cn.fujitsu.com>

On migration target, msix_vector_use() will be called in vmxnet3_post_load()
in second time, without a matching second call to msi_vector_unuse(),
which results in vector reference leak.

CC: Dmitry Fleytman <dmitry@daynix.com>
CC: Jason Wang <jasowang@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/net/vmxnet3.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index a433cc0..45e125e 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2552,21 +2552,11 @@ static void vmxnet3_put_rxq_descr(QEMUFile *f, void *pv, size_t size)
 static int vmxnet3_post_load(void *opaque, int version_id)
 {
     VMXNET3State *s = opaque;
-    PCIDevice *d = PCI_DEVICE(s);
 
     net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
                     s->max_tx_frags, s->peer_has_vhdr);
     net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
 
-    if (s->msix_used) {
-        if  (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
-            VMW_WRPRN("Failed to re-use MSI-X vectors");
-            msix_uninit(d, &s->msix_bar, &s->msix_bar);
-            s->msix_used = false;
-            return -1;
-        }
-    }
-
     vmxnet3_validate_queues(s);
     vmxnet3_validate_interrupts(s);
 
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 42/47] vmxnet3: remove unnecessary internal msix flag
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (40 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 41/47] vmxnet3: fix reference leak issue Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-10 16:14 ` [Qemu-devel] [PULL 43/47] msi_init: convert assert to return -errno Michael S. Tsirkin
                   ` (5 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Cao jin, Dmitry Fleytman, Jason Wang,
	Markus Armbruster, Marcel Apfelbaum

From: Cao jin <caoj.fnst@cn.fujitsu.com>

Internal flag msix_used is unnecessary, it has the same effect as
msix_enabled().

The corresponding msi flag is already dropped in commit 1070048e.

CC: Dmitry Fleytman <dmitry@daynix.com>
CC: Jason Wang <jasowang@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/net/vmxnet3.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 45e125e..af39965 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -281,8 +281,6 @@ typedef struct {
         Vmxnet3RxqDescr rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
         Vmxnet3TxqDescr txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
 
-        /* Whether MSI-X support was installed successfully */
-        bool msix_used;
         hwaddr drv_shmem;
         hwaddr temp_shared_guest_driver_memory;
 
@@ -359,7 +357,7 @@ static bool _vmxnet3_assert_interrupt_line(VMXNET3State *s, uint32_t int_idx)
 {
     PCIDevice *d = PCI_DEVICE(s);
 
-    if (s->msix_used && msix_enabled(d)) {
+    if (msix_enabled(d)) {
         VMW_IRPRN("Sending MSI-X notification for vector %u", int_idx);
         msix_notify(d, int_idx);
         return false;
@@ -383,7 +381,7 @@ static void _vmxnet3_deassert_interrupt_line(VMXNET3State *s, int lidx)
      * This function should never be called for MSI(X) interrupts
      * because deassertion never required for message interrupts
      */
-    assert(!s->msix_used || !msix_enabled(d));
+    assert(!msix_enabled(d));
     /*
      * This function should never be called for MSI(X) interrupts
      * because deassertion never required for message interrupts
@@ -421,7 +419,7 @@ static void vmxnet3_trigger_interrupt(VMXNET3State *s, int lidx)
     s->interrupt_states[lidx].is_pending = true;
     vmxnet3_update_interrupt_line_state(s, lidx);
 
-    if (s->msix_used && msix_enabled(d) && s->auto_int_masking) {
+    if (msix_enabled(d) && s->auto_int_masking) {
         goto do_automask;
     }
 
@@ -1428,7 +1426,7 @@ static void vmxnet3_update_features(VMXNET3State *s)
 
 static bool vmxnet3_verify_intx(VMXNET3State *s, int intx)
 {
-    return s->msix_used || msi_enabled(PCI_DEVICE(s))
+    return msix_enabled(PCI_DEVICE(s)) || msi_enabled(PCI_DEVICE(s))
         || intx == pci_get_byte(s->parent_obj.config + PCI_INTERRUPT_PIN) - 1;
 }
 
@@ -1445,18 +1443,18 @@ static void vmxnet3_validate_interrupts(VMXNET3State *s)
     int i;
 
     VMW_CFPRN("Verifying event interrupt index (%d)", s->event_int_idx);
-    vmxnet3_validate_interrupt_idx(s->msix_used, s->event_int_idx);
+    vmxnet3_validate_interrupt_idx(msix_enabled(PCI_DEVICE(s)), s->event_int_idx);
 
     for (i = 0; i < s->txq_num; i++) {
         int idx = s->txq_descr[i].intr_idx;
         VMW_CFPRN("Verifying TX queue %d interrupt index (%d)", i, idx);
-        vmxnet3_validate_interrupt_idx(s->msix_used, idx);
+        vmxnet3_validate_interrupt_idx(msix_enabled(PCI_DEVICE(s)), idx);
     }
 
     for (i = 0; i < s->rxq_num; i++) {
         int idx = s->rxq_descr[i].intr_idx;
         VMW_CFPRN("Verifying RX queue %d interrupt index (%d)", i, idx);
-        vmxnet3_validate_interrupt_idx(s->msix_used, idx);
+        vmxnet3_validate_interrupt_idx(msix_enabled(PCI_DEVICE(s)), idx);
     }
 }
 
@@ -2185,6 +2183,7 @@ vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors)
 static bool
 vmxnet3_init_msix(VMXNET3State *s)
 {
+    bool msix;
     PCIDevice *d = PCI_DEVICE(s);
     int res = msix_init(d, VMXNET3_MAX_INTRS,
                         &s->msix_bar,
@@ -2199,17 +2198,18 @@ vmxnet3_init_msix(VMXNET3State *s)
 
     if (0 > res) {
         VMW_WRPRN("Failed to initialize MSI-X, board's MSI support is broken");
-        s->msix_used = false;
+        msix = false;
     } else {
         if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
             VMW_WRPRN("Failed to use MSI-X vectors, error %d", res);
             msix_uninit(d, &s->msix_bar, &s->msix_bar);
-            s->msix_used = false;
+            msix = false;
         } else {
-            s->msix_used = true;
+            msix = true;
         }
     }
-    return s->msix_used;
+
+    return msix;
 }
 
 static void
@@ -2217,7 +2217,7 @@ vmxnet3_cleanup_msix(VMXNET3State *s)
 {
     PCIDevice *d = PCI_DEVICE(s);
 
-    if (s->msix_used) {
+    if (msix_enabled(d)) {
         vmxnet3_unuse_msix_vectors(s, VMXNET3_MAX_INTRS);
         msix_uninit(d, &s->msix_bar, &s->msix_bar);
     }
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 43/47] msi_init: convert assert to return -errno
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (41 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 42/47] vmxnet3: remove unnecessary internal msix flag Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-10 16:14 ` [Qemu-devel] [PULL 44/47] vhost: adapt vhost_verify_ring_mappings() to virtio 1 ring layout Michael S. Tsirkin
                   ` (4 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Cao jin, Markus Armbruster, Marcel Apfelbaum

From: Cao jin <caoj.fnst@cn.fujitsu.com>

According to the disscussion:
http://lists.nongnu.org/archive/html/qemu-devel/2016-09/msg08215.html

Let leaf function returns reasonable -errno, let caller decide how to
handle the return value.

Suggested-by: Markus Armbruster <armbru@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Acked-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/msi.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index a87b227..af3efbe 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -201,9 +201,12 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
                    " 64bit %d mask %d\n",
                    offset, nr_vectors, msi64bit, msi_per_vector_mask);
 
-    assert(!(nr_vectors & (nr_vectors - 1)));   /* power of 2 */
-    assert(nr_vectors > 0);
-    assert(nr_vectors <= PCI_MSI_VECTORS_MAX);
+    /* vector sanity test: should in range 1 - 32, should be power of 2 */
+    if (!is_power_of_2(nr_vectors) || nr_vectors > PCI_MSI_VECTORS_MAX) {
+        error_setg(errp, "Invalid vector number: %d", nr_vectors);
+        return -EINVAL;
+    }
+
     /* the nr of MSI vectors is up to 32 */
     vectors_order = ctz32(nr_vectors);
 
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 44/47] vhost: adapt vhost_verify_ring_mappings() to virtio 1 ring layout
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (42 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 43/47] msi_init: convert assert to return -errno Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-10 16:14 ` [Qemu-devel] [PULL 45/47] vhost: drop legacy vring layout bits Michael S. Tsirkin
                   ` (3 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Greg Kurz, qemu-stable, Cornelia Huck

From: Greg Kurz <groug@kaod.org>

With virtio 1, the vring layout is split in 3 separate regions of
contiguous memory for the descriptor table, the available ring and the
used ring, as opposed with legacy virtio which uses a single region.

In case of memory re-mapping, the code ensures it doesn't affect the
vring mapping. This is done in vhost_verify_ring_mappings() which assumes
the device is legacy.

This patch changes vhost_verify_ring_mappings() to check the mappings of
each part of the vring separately.

This works for legacy mappings as well.

Cc: qemu-stable@nongnu.org
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/virtio/vhost.h |  4 +++
 hw/virtio/vhost.c         | 79 +++++++++++++++++++++++++++++++++++------------
 2 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index e433089..56b567f 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -14,6 +14,10 @@ struct vhost_virtqueue {
     void *avail;
     void *used;
     int num;
+    unsigned long long desc_phys;
+    unsigned desc_size;
+    unsigned long long avail_phys;
+    unsigned avail_size;
     unsigned long long used_phys;
     unsigned used_size;
     void *ring;
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index cf122bb..a9c1edb 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -421,32 +421,73 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
     dev->log_size = size;
 }
 
+
+static int vhost_verify_ring_part_mapping(void *part,
+                                          uint64_t part_addr,
+                                          uint64_t part_size,
+                                          uint64_t start_addr,
+                                          uint64_t size)
+{
+    hwaddr l;
+    void *p;
+    int r = 0;
+
+    if (!ranges_overlap(start_addr, size, part_addr, part_size)) {
+        return 0;
+    }
+    l = part_size;
+    p = cpu_physical_memory_map(part_addr, &l, 1);
+    if (!p || l != part_size) {
+        r = -ENOMEM;
+    }
+    if (p != part) {
+        r = -EBUSY;
+    }
+    cpu_physical_memory_unmap(p, l, 0, 0);
+    return r;
+}
+
 static int vhost_verify_ring_mappings(struct vhost_dev *dev,
                                       uint64_t start_addr,
                                       uint64_t size)
 {
-    int i;
+    int i, j;
     int r = 0;
+    const char *part_name[] = {
+        "descriptor table",
+        "available ring",
+        "used ring"
+    };
 
-    for (i = 0; !r && i < dev->nvqs; ++i) {
+    for (i = 0; i < dev->nvqs; ++i) {
         struct vhost_virtqueue *vq = dev->vqs + i;
-        hwaddr l;
-        void *p;
 
-        if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) {
-            continue;
+        j = 0;
+        r = vhost_verify_ring_part_mapping(vq->desc, vq->desc_phys,
+                                           vq->desc_size, start_addr, size);
+        if (!r) {
+            break;
         }
-        l = vq->ring_size;
-        p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
-        if (!p || l != vq->ring_size) {
-            error_report("Unable to map ring buffer for ring %d", i);
-            r = -ENOMEM;
+
+        j++;
+        r = vhost_verify_ring_part_mapping(vq->avail, vq->avail_phys,
+                                           vq->avail_size, start_addr, size);
+        if (!r) {
+            break;
         }
-        if (p != vq->ring) {
-            error_report("Ring buffer relocated for ring %d", i);
-            r = -EBUSY;
+
+        j++;
+        r = vhost_verify_ring_part_mapping(vq->used, vq->used_phys,
+                                           vq->used_size, start_addr, size);
+        if (!r) {
+            break;
         }
-        cpu_physical_memory_unmap(p, l, 0, 0);
+    }
+
+    if (r == -ENOMEM) {
+        error_report("Unable to map %s for ring %d", part_name[j], i);
+    } else if (r == -EBUSY) {
+        error_report("%s relocated for ring %d", part_name[j], i);
     }
     return r;
 }
@@ -860,15 +901,15 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
         }
     }
 
-    s = l = virtio_queue_get_desc_size(vdev, idx);
-    a = virtio_queue_get_desc_addr(vdev, idx);
+    vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx);
+    vq->desc_phys = a = virtio_queue_get_desc_addr(vdev, idx);
     vq->desc = cpu_physical_memory_map(a, &l, 0);
     if (!vq->desc || l != s) {
         r = -ENOMEM;
         goto fail_alloc_desc;
     }
-    s = l = virtio_queue_get_avail_size(vdev, idx);
-    a = virtio_queue_get_avail_addr(vdev, idx);
+    vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
+    vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
     vq->avail = cpu_physical_memory_map(a, &l, 0);
     if (!vq->avail || l != s) {
         r = -ENOMEM;
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 45/47] vhost: drop legacy vring layout bits
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (43 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 44/47] vhost: adapt vhost_verify_ring_mappings() to virtio 1 ring layout Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-10 16:14 ` [Qemu-devel] [PULL 46/47] virtio: drop virtio_queue_get_ring_{size, addr}() Michael S. Tsirkin
                   ` (2 subsequent siblings)
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Greg Kurz, Cornelia Huck

From: Greg Kurz <groug@kaod.org>

The legacy vring layout is not used anymore as we use the separate
mappings even for legacy devices.
This patch simply removes it.

This also fixes a bug with virtio 1 devices when the vring descriptor table
is mapped at a higher address than the used vring because the following
function may return an insanely great value:

hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
{
    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
           virtio_queue_get_used_size(vdev, n);
}

and the mapping fails.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/virtio/vhost.h |  3 ---
 hw/virtio/vhost.c         | 13 -------------
 2 files changed, 16 deletions(-)

diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 56b567f..1fe5aad 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -20,9 +20,6 @@ struct vhost_virtqueue {
     unsigned avail_size;
     unsigned long long used_phys;
     unsigned used_size;
-    void *ring;
-    unsigned long long ring_phys;
-    unsigned ring_size;
     EventNotifier masked_notifier;
 };
 
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index a9c1edb..4d52538 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -923,14 +923,6 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
         goto fail_alloc_used;
     }
 
-    vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
-    vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
-    vq->ring = cpu_physical_memory_map(a, &l, 1);
-    if (!vq->ring || l != s) {
-        r = -ENOMEM;
-        goto fail_alloc_ring;
-    }
-
     r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
     if (r < 0) {
         r = -errno;
@@ -971,9 +963,6 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
 fail_vector:
 fail_kick:
 fail_alloc:
-    cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
-                              0, 0);
-fail_alloc_ring:
     cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
                               0, 0);
 fail_alloc_used:
@@ -1014,8 +1003,6 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
                                                 vhost_vq_index);
     }
 
-    cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
-                              0, virtio_queue_get_ring_size(vdev, idx));
     cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
                               1, virtio_queue_get_used_size(vdev, idx));
     cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 46/47] virtio: drop virtio_queue_get_ring_{size, addr}()
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (44 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 45/47] vhost: drop legacy vring layout bits Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-10 16:14 ` [Qemu-devel] [PULL 47/47] docs: add PCIe devices placement guidelines Michael S. Tsirkin
  2016-11-10 22:29 ` [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Alex Williamson
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Greg Kurz, Cornelia Huck

From: Greg Kurz <groug@kaod.org>

These are not used anymore.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/virtio/virtio.h |  2 --
 hw/virtio/virtio.c         | 11 -----------
 2 files changed, 13 deletions(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index bdb3c4b..5951997 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -259,11 +259,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n);
-hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n);
-hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n);
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n);
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 72ee06b..55a00cd 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1935,11 +1935,6 @@ hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
     return vdev->vq[n].vring.used;
 }
 
-hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
-{
-    return vdev->vq[n].vring.desc;
-}
-
 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
 {
     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
@@ -1957,12 +1952,6 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
 }
 
-hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
-{
-    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
-	    virtio_queue_get_used_size(vdev, n);
-}
-
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
 {
     return vdev->vq[n].last_avail_idx;
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [Qemu-devel] [PULL 47/47] docs: add PCIe devices placement guidelines
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (45 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 46/47] virtio: drop virtio_queue_get_ring_{size, addr}() Michael S. Tsirkin
@ 2016-11-10 16:14 ` Michael S. Tsirkin
  2016-11-10 22:29 ` [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Alex Williamson
  47 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 16:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Marcel Apfelbaum, Laszlo Ersek

From: Marcel Apfelbaum <marcel@redhat.com>

Proposes best practices on how to use PCI Express/PCI device
in PCI Express based machines and explain the reasoning behind them.

Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 docs/pcie.txt | 310 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 310 insertions(+)
 create mode 100644 docs/pcie.txt

diff --git a/docs/pcie.txt b/docs/pcie.txt
new file mode 100644
index 0000000..9fb20aa
--- /dev/null
+++ b/docs/pcie.txt
@@ -0,0 +1,310 @@
+PCI EXPRESS GUIDELINES
+======================
+
+1. Introduction
+================
+The doc proposes best practices on how to use PCI Express/PCI device
+in PCI Express based machines and explains the reasoning behind them.
+
+The following presentations accompany this document:
+ (1) Q35 overview.
+     http://wiki.qemu.org/images/4/4e/Q35.pdf
+ (2) A comparison between PCI and PCI Express technologies.
+     http://wiki.qemu.org/images/f/f6/PCIvsPCIe.pdf
+
+Note: The usage examples are not intended to replace the full
+documentation, please use QEMU help to retrieve all options.
+
+2. Device placement strategy
+============================
+QEMU does not have a clear socket-device matching mechanism
+and allows any PCI/PCI Express device to be plugged into any
+PCI/PCI Express slot.
+Plugging a PCI device into a PCI Express slot might not always work and
+is weird anyway since it cannot be done for "bare metal".
+Plugging a PCI Express device into a PCI slot will hide the Extended
+Configuration Space thus is also not recommended.
+
+The recommendation is to separate the PCI Express and PCI hierarchies.
+PCI Express devices should be plugged only into PCI Express Root Ports and
+PCI Express Downstream ports.
+
+2.1 Root Bus (pcie.0)
+=====================
+Place only the following kinds of devices directly on the Root Complex:
+    (1) PCI Devices (e.g. network card, graphics card, IDE controller),
+        not controllers. Place only legacy PCI devices on
+        the Root Complex. These will be considered Integrated Endpoints.
+        Note: Integrated Endpoints are not hot-pluggable.
+
+        Although the PCI Express spec does not forbid PCI Express devices as
+        Integrated Endpoints, existing hardware mostly integrates legacy PCI
+        devices with the Root Complex. Guest OSes are suspected to behave
+        strangely when PCI Express devices are integrated
+        with the Root Complex.
+
+    (2) PCI Express Root Ports (ioh3420), for starting exclusively PCI Express
+        hierarchies.
+
+    (3) DMI-PCI Bridges (i82801b11-bridge), for starting legacy PCI
+        hierarchies.
+
+    (4) Extra Root Complexes (pxb-pcie), if multiple PCI Express Root Buses
+        are needed.
+
+   pcie.0 bus
+   ----------------------------------------------------------------------------
+        |                |                    |                  |
+   -----------   ------------------   ------------------   --------------
+   | PCI Dev |   | PCIe Root Port |   | DMI-PCI Bridge |   |  pxb-pcie  |
+   -----------   ------------------   ------------------   --------------
+
+2.1.1 To plug a device into pcie.0 as a Root Complex Integrated Endpoint use:
+          -device <dev>[,bus=pcie.0]
+2.1.2 To expose a new PCI Express Root Bus use:
+          -device pxb-pcie,id=pcie.1,bus_nr=x[,numa_node=y][,addr=z]
+      Only PCI Express Root Ports and DMI-PCI bridges can be connected
+      to the pcie.1 bus:
+          -device ioh3420,id=root_port1[,bus=pcie.1][,chassis=x][,slot=y][,addr=z]                                     \
+          -device i82801b11-bridge,id=dmi_pci_bridge1,bus=pcie.1
+
+
+2.2 PCI Express only hierarchy
+==============================
+Always use PCI Express Root Ports to start PCI Express hierarchies.
+
+A PCI Express Root bus supports up to 32 devices. Since each
+PCI Express Root Port is a function and a multi-function
+device may support up to 8 functions, the maximum possible
+number of PCI Express Root Ports per PCI Express Root Bus is 256.
+
+Prefer grouping PCI Express Root Ports into multi-function devices
+to keep a simple flat hierarchy that is enough for most scenarios.
+Only use PCI Express Switches (x3130-upstream, xio3130-downstream)
+if there is no more room for PCI Express Root Ports.
+Please see section 4. for further justifications.
+
+Plug only PCI Express devices into PCI Express Ports.
+
+
+   pcie.0 bus
+   ----------------------------------------------------------------------------------
+        |                 |                                    |
+   -------------    -------------                        -------------
+   | Root Port |    | Root Port |                        | Root Port |
+   ------------     -------------                        -------------
+         |                            -------------------------|------------------------
+    ------------                      |                 -----------------              |
+    | PCIe Dev |                      |    PCI Express  | Upstream Port |              |
+    ------------                      |      Switch     -----------------              |
+                                      |                  |            |                |
+                                      |    -------------------    -------------------  |
+                                      |    | Downstream Port |    | Downstream Port |  |
+                                      |    -------------------    -------------------  |
+                                      -------------|-----------------------|------------
+                                             ------------
+                                             | PCIe Dev |
+                                             ------------
+
+2.2.1 Plugging a PCI Express device into a PCI Express Root Port:
+          -device ioh3420,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z]  \
+          -device <dev>,bus=root_port1
+2.2.2 Using multi-function PCI Express Root Ports:
+      -device ioh3420,id=root_port1,multifunction=on,chassis=x,slot=y[,bus=pcie.0][,addr=z.0] \
+      -device ioh3420,id=root_port2,chassis=x1,slot=y1[,bus=pcie.0][,addr=z.1] \
+      -device ioh3420,id=root_port3,chassis=x2,slot=y2[,bus=pcie.0][,addr=z.2] \
+2.2.2 Plugging a PCI Express device into a Switch:
+      -device ioh3420,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z]  \
+      -device x3130-upstream,id=upstream_port1,bus=root_port1[,addr=x]          \
+      -device xio3130-downstream,id=downstream_port1,bus=upstream_port1,chassis=x1,slot=y1[,addr=z1]] \
+      -device <dev>,bus=downstream_port1
+
+Notes:
+  - (slot, chassis) pair is mandatory and must be
+     unique for each PCI Express Root Port.
+  - 'addr' parameter can be 0 for all the examples above.
+
+
+2.3 PCI only hierarchy
+======================
+Legacy PCI devices can be plugged into pcie.0 as Integrated Endpoints,
+but, as mentioned in section 5, doing so means the legacy PCI
+device in question will be incapable of hot-unplugging.
+Besides that use DMI-PCI Bridges (i82801b11-bridge) in combination
+with PCI-PCI Bridges (pci-bridge) to start PCI hierarchies.
+
+Prefer flat hierarchies. For most scenarios a single DMI-PCI Bridge
+(having 32 slots) and several PCI-PCI Bridges attached to it
+(each supporting also 32 slots) will support hundreds of legacy devices.
+The recommendation is to populate one PCI-PCI Bridge under the DMI-PCI Bridge
+until is full and then plug a new PCI-PCI Bridge...
+
+   pcie.0 bus
+   ----------------------------------------------
+        |                            |
+   -----------               ------------------
+   | PCI Dev |               | DMI-PCI BRIDGE |
+   ----------                ------------------
+                               |            |
+                  ------------------    ------------------
+                  | PCI-PCI Bridge |    | PCI-PCI Bridge |   ...
+                  ------------------    ------------------
+                                         |           |
+                                  -----------     -----------
+                                  | PCI Dev |     | PCI Dev |
+                                  -----------     -----------
+
+2.3.1 To plug a PCI device into pcie.0 as an Integrated Endpoint use:
+      -device <dev>[,bus=pcie.0]
+2.3.2 Plugging a PCI device into a PCI-PCI Bridge:
+      -device i82801b11-bridge,id=dmi_pci_bridge1[,bus=pcie.0]                        \
+      -device pci-bridge,id=pci_bridge1,bus=dmi_pci_bridge1[,chassis_nr=x][,addr=y]   \
+      -device <dev>,bus=pci_bridge1[,addr=x]
+      Note that 'addr' cannot be 0 unless shpc=off parameter is passed to
+      the PCI Bridge.
+
+3. IO space issues
+===================
+The PCI Express Root Ports and PCI Express Downstream ports are seen by
+Firmware/Guest OS as PCI-PCI Bridges. As required by the PCI spec, each
+such Port should be reserved a 4K IO range for, even though only one
+(multifunction) device can be plugged into each Port. This results in
+poor IO space utilization.
+
+The firmware used by QEMU (SeaBIOS/OVMF) may try further optimizations
+by not allocating IO space for each PCI Express Root / PCI Express
+Downstream port if:
+    (1) the port is empty, or
+    (2) the device behind the port has no IO BARs.
+
+The IO space is very limited, to 65536 byte-wide IO ports, and may even be
+fragmented by fixed IO ports owned by platform devices resulting in at most
+10 PCI Express Root Ports or PCI Express Downstream Ports per system
+if devices with IO BARs are used in the PCI Express hierarchy. Using the
+proposed device placing strategy solves this issue by using only
+PCI Express devices within PCI Express hierarchy.
+
+The PCI Express spec requires that PCI Express devices work properly
+without using IO ports. The PCI hierarchy has no such limitations.
+
+
+4. Bus numbers issues
+======================
+Each PCI domain can have up to only 256 buses and the QEMU PCI Express
+machines do not support multiple PCI domains even if extra Root
+Complexes (pxb-pcie) are used.
+
+Each element of the PCI Express hierarchy (Root Complexes,
+PCI Express Root Ports, PCI Express Downstream/Upstream ports)
+uses one bus number. Since only one (multifunction) device
+can be attached to a PCI Express Root Port or PCI Express Downstream
+Port it is advised to plan in advance for the expected number of
+devices to prevent bus number starvation.
+
+Avoiding PCI Express Switches (and thereby striving for a 'flatter' PCI
+Express hierarchy) enables the hierarchy to not spend bus numbers on
+Upstream Ports.
+
+The bus_nr properties of the pxb-pcie devices partition the 0..255 bus
+number space. All bus numbers assigned to the buses recursively behind a
+given pxb-pcie device's root bus must fit between the bus_nr property of
+that pxb-pcie device, and the lowest of the higher bus_nr properties
+that the command line sets for other pxb-pcie devices.
+
+
+5. Hot-plug
+============
+The PCI Express root buses (pcie.0 and the buses exposed by pxb-pcie devices)
+do not support hot-plug, so any devices plugged into Root Complexes
+cannot be hot-plugged/hot-unplugged:
+    (1) PCI Express Integrated Endpoints
+    (2) PCI Express Root Ports
+    (3) DMI-PCI Bridges
+    (4) pxb-pcie
+
+Be aware that PCI Express Downstream Ports can't be hot-plugged into
+an existing PCI Express Upstream Port.
+
+PCI devices can be hot-plugged into PCI-PCI Bridges. The PCI hot-plug is ACPI
+based and can work side by side with the PCI Express native hot-plug.
+
+PCI Express devices can be natively hot-plugged/hot-unplugged into/from
+PCI Express Root Ports (and PCI Express Downstream Ports).
+
+5.1 Planning for hot-plug:
+    (1) PCI hierarchy
+        Leave enough PCI-PCI Bridge slots empty or add one
+        or more empty PCI-PCI Bridges to the DMI-PCI Bridge.
+
+        For each such PCI-PCI Bridge the Guest Firmware is expected to reserve
+        4K IO space and 2M MMIO range to be used for all devices behind it.
+
+        Because of the hard IO limit of around 10 PCI Bridges (~ 40K space)
+        per system don't use more than 9 PCI-PCI Bridges, leaving 4K for the
+        Integrated Endpoints. (The PCI Express Hierarchy needs no IO space).
+
+    (2) PCI Express hierarchy:
+        Leave enough PCI Express Root Ports empty. Use multifunction
+        PCI Express Root Ports (up to 8 ports per pcie.0 slot)
+        on the Root Complex(es), for keeping the
+        hierarchy as flat as possible, thereby saving PCI bus numbers.
+        Don't use PCI Express Switches if you don't have
+        to, each one of those uses an extra PCI bus (for its Upstream Port)
+        that could be put to better use with another Root Port or Downstream
+        Port, which may come handy for hot-plugging another device.
+
+
+5.3 Hot-plug example:
+Using HMP: (add -monitor stdio to QEMU command line)
+  device_add <dev>,id=<id>,bus=<PCI Express Root Port Id/PCI Express Downstream Port Id/PCI-PCI Bridge Id/>
+
+
+6. Device assignment
+====================
+Host devices are mostly PCI Express and should be plugged only into
+PCI Express Root Ports or PCI Express Downstream Ports.
+PCI-PCI Bridge slots can be used for legacy PCI host devices.
+
+6.1 How to detect if a device is PCI Express:
+  > lspci -s 03:00.0 -v (as root)
+
+    03:00.0 Network controller: Intel Corporation Wireless 7260 (rev 83)
+    Subsystem: Intel Corporation Dual Band Wireless-AC 7260
+    Flags: bus master, fast devsel, latency 0, IRQ 50
+    Memory at f0400000 (64-bit, non-prefetchable) [size=8K]
+    Capabilities: [c8] Power Management version 3
+    Capabilities: [d0] MSI: Enable+ Count=1/1 Maskable- 64bit+
+    Capabilities: [40] Express Endpoint, MSI 00
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    Capabilities: [100] Advanced Error Reporting
+    Capabilities: [140] Device Serial Number 7c-7a-91-ff-ff-90-db-20
+    Capabilities: [14c] Latency Tolerance Reporting
+    Capabilities: [154] Vendor Specific Information: ID=cafe Rev=1 Len=014 
+
+If you can see the "Express Endpoint" capability in the
+output, then the device is indeed PCI Express.
+
+
+7. Virtio devices
+=================
+Virtio devices plugged into the PCI hierarchy or as Integrated Endpoints
+will remain PCI and have transitional behaviour as default.
+Transitional virtio devices work in both IO and MMIO modes depending on
+the guest support. The Guest firmware will assign both IO and MMIO resources
+to transitional virtio devices.
+
+Virtio devices plugged into PCI Express ports are PCI Express devices and
+have "1.0" behavior by default without IO support.
+In both cases disable-legacy and disable-modern properties can be used
+to override the behaviour.
+
+Note that setting disable-legacy=off will enable legacy mode (enabling
+legacy behavior) for PCI Express virtio devices causing them to
+require IO space, which, given the limited available IO space, may quickly
+lead to resource exhaustion, and is therefore strongly discouraged.
+
+
+8. Conclusion
+==============
+The proposal offers a usage model that is easy to understand and follow
+and at the same time overcomes the PCI Express architecture limitations.
-- 
MST

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 05/47] contrib: add libvhost-user
  2016-11-10 16:12 ` [Qemu-devel] [PULL 05/47] contrib: add libvhost-user Michael S. Tsirkin
@ 2016-11-10 17:05   ` Daniel P. Berrange
  2016-11-10 17:20     ` Michael S. Tsirkin
  0 siblings, 1 reply; 67+ messages in thread
From: Daniel P. Berrange @ 2016-11-10 17:05 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: qemu-devel, Peter Maydell, Marc-André Lureau,
	Felipe Franciosi, Paolo Bonzini, Stefan Hajnoczi, Eric Blake,
	Markus Armbruster

On Thu, Nov 10, 2016 at 06:12:03PM +0200, Michael S. Tsirkin wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> Add a library to help implementing vhost-user backend (or slave).
> 
> Dealing with vhost-user as an application developer isn't so easy: you
> have all the trouble with any protocol: validation, unix ancillary data,
> shared memory, eventfd, logging, and on top of that you need to deal
> with virtio queues, if possible efficiently.
> 
> qemu test has a nice vhost-user testing application vhost-user-bridge,
> which implements most of vhost-user, and virtio.c which implements
> virtqueues manipulation. Based on these two, I tried to make a simple
> library, reusable for tests or development of new vhost-user scenarios.

By "new vhost-user scenarios" does this mean facilitating creation of
vhost user clients by out of tree 3rd parties ?

Unless I'm missing something, the change doesn't actually build any
.so of this code, so only thing that can use it right now is other
QEMU code.

If we want 3rd parties to use this, then it seems to me that LGPLv2+ or
even a BSD like license would be more applicable than GPLv2+. Also we
would need to state what API/ABI stability guarantees we're going to
provide and create a versioned ELF .so.

In general this patch looks like it is something to propose in 2.9,
rather than pushing feature work in post-freeze.

Regards,
Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://entangle-photo.org       -o-    http://search.cpan.org/~danberr/ :|

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 05/47] contrib: add libvhost-user
  2016-11-10 17:05   ` Daniel P. Berrange
@ 2016-11-10 17:20     ` Michael S. Tsirkin
  0 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 17:20 UTC (permalink / raw)
  To: Daniel P. Berrange
  Cc: qemu-devel, Peter Maydell, Marc-André Lureau,
	Felipe Franciosi, Paolo Bonzini, Stefan Hajnoczi, Eric Blake,
	Markus Armbruster

On Thu, Nov 10, 2016 at 05:05:03PM +0000, Daniel P. Berrange wrote:
> On Thu, Nov 10, 2016 at 06:12:03PM +0200, Michael S. Tsirkin wrote:
> > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> > 
> > Add a library to help implementing vhost-user backend (or slave).
> > 
> > Dealing with vhost-user as an application developer isn't so easy: you
> > have all the trouble with any protocol: validation, unix ancillary data,
> > shared memory, eventfd, logging, and on top of that you need to deal
> > with virtio queues, if possible efficiently.
> > 
> > qemu test has a nice vhost-user testing application vhost-user-bridge,
> > which implements most of vhost-user, and virtio.c which implements
> > virtqueues manipulation. Based on these two, I tried to make a simple
> > library, reusable for tests or development of new vhost-user scenarios.
> 
> By "new vhost-user scenarios" does this mean facilitating creation of
> vhost user clients by out of tree 3rd parties ?

Just more tests at this point.

> Unless I'm missing something, the change doesn't actually build any
> .so of this code, so only thing that can use it right now is other
> QEMU code.
> 
> If we want 3rd parties to use this, then it seems to me that LGPLv2+ or
> even a BSD like license would be more applicable than GPLv2+. Also we
> would need to state what API/ABI stability guarantees we're going to
> provide and create a versioned ELF .so.

We can discuss this when someone tries to do this.

> In general this patch looks like it is something to propose in 2.9,
> rather than pushing feature work in post-freeze.
> 
> Regards,
> Daniel

>From my perspective, anything that helps find failures
at user systems during build and not when
running guests is a plus.

> -- 
> |: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
> |: http://libvirt.org              -o-             http://virt-manager.org :|
> |: http://entangle-photo.org       -o-    http://search.cpan.org/~danberr/ :|

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
                   ` (46 preceding siblings ...)
  2016-11-10 16:14 ` [Qemu-devel] [PULL 47/47] docs: add PCIe devices placement guidelines Michael S. Tsirkin
@ 2016-11-10 22:29 ` Alex Williamson
  2016-11-10 22:33   ` Michael S. Tsirkin
  47 siblings, 1 reply; 67+ messages in thread
From: Alex Williamson @ 2016-11-10 22:29 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: qemu-devel, Peter Maydell, Stefan Hajnoczi

On Thu, 10 Nov 2016 18:12:20 +0200
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> The following changes since commit 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6:
> 
>   MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +0000)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> 
> for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17:
> 
>   docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200)
> 
> ----------------------------------------------------------------
> virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
> 
> Lots of fixes all over the place.  I allowed some cleanups in even though they
> are not strictly bugfixes, they might prevent bugs and seem very safe.
> 
> Most importantly, this fixes a regression with vhost introduced
> by the last pull.

I think I'm hitting this previous regression, I see this in my libvirt
log:

kvm_mem_ioeventfd_add: error adding ioeventfd: File exists

And a backtrace as seen here
https://paste.fedoraproject.org/477562/88144131/

So I merge this tag to try to resolve it, now I get qemu segfaulting
bisected to:

commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978
Author: Cao jin <caoj.fnst@cn.fujitsu.com>
Date:   Sat Nov 5 10:07:21 2016 +0800

    hcd-xhci: check & correct param before using it

And if I revert that, vhost still fails with:

qemu-system-x86_64: /net/gimli/home/alwillia/Work/qemu.git/memory.c:2012: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed.

So I think this does not fix the previous regression and adds a new
one :-\

Thanks,
Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-10 22:29 ` [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Alex Williamson
@ 2016-11-10 22:33   ` Michael S. Tsirkin
  2016-11-10 22:48     ` Alex Williamson
  0 siblings, 1 reply; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 22:33 UTC (permalink / raw)
  To: Alex Williamson
  Cc: qemu-devel, Peter Maydell, Stefan Hajnoczi, Cao jin,
	Paolo Bonzini, Felipe Franciosi

On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote:
> On Thu, 10 Nov 2016 18:12:20 +0200
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > The following changes since commit 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6:
> > 
> >   MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +0000)
> > 
> > are available in the git repository at:
> > 
> >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> > 
> > for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17:
> > 
> >   docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200)
> > 
> > ----------------------------------------------------------------
> > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
> > 
> > Lots of fixes all over the place.  I allowed some cleanups in even though they
> > are not strictly bugfixes, they might prevent bugs and seem very safe.
> > 
> > Most importantly, this fixes a regression with vhost introduced
> > by the last pull.
> 
> I think I'm hitting this previous regression, I see this in my libvirt
> log:
> 
> kvm_mem_ioeventfd_add: error adding ioeventfd: File exists
> 
> And a backtrace as seen here
> https://paste.fedoraproject.org/477562/88144131/
> 
> So I merge this tag to try to resolve it, now I get qemu segfaulting
> bisected to:
> 
> commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978
> Author: Cao jin <caoj.fnst@cn.fujitsu.com>
> Date:   Sat Nov 5 10:07:21 2016 +0800
> 
>     hcd-xhci: check & correct param before using it
> 
> And if I revert that, vhost still fails with:
> 
> qemu-system-x86_64: /net/gimli/home/alwillia/Work/qemu.git/memory.c:2012: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed.
> 
> So I think this does not fix the previous regression and adds a new
> one :-\
> 
> Thanks,
> Alex


Thanks!
Stefan, pls defer merging while we investigate.

Cc a bunch of relevant people.

-- 
MST

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-10 22:33   ` Michael S. Tsirkin
@ 2016-11-10 22:48     ` Alex Williamson
  2016-11-10 22:51       ` Michael S. Tsirkin
  2016-11-10 23:09       ` Michael S. Tsirkin
  0 siblings, 2 replies; 67+ messages in thread
From: Alex Williamson @ 2016-11-10 22:48 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: qemu-devel, Peter Maydell, Stefan Hajnoczi, Cao jin,
	Paolo Bonzini, Felipe Franciosi

On Fri, 11 Nov 2016 00:33:17 +0200
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote:
> > On Thu, 10 Nov 2016 18:12:20 +0200
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >   
> > > The following changes since commit 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6:
> > > 
> > >   MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +0000)
> > > 
> > > are available in the git repository at:
> > > 
> > >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> > > 
> > > for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17:
> > > 
> > >   docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200)
> > > 
> > > ----------------------------------------------------------------
> > > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
> > > 
> > > Lots of fixes all over the place.  I allowed some cleanups in even though they
> > > are not strictly bugfixes, they might prevent bugs and seem very safe.
> > > 
> > > Most importantly, this fixes a regression with vhost introduced
> > > by the last pull.  
> > 
> > I think I'm hitting this previous regression, I see this in my libvirt
> > log:
> > 
> > kvm_mem_ioeventfd_add: error adding ioeventfd: File exists
> > 
> > And a backtrace as seen here
> > https://paste.fedoraproject.org/477562/88144131/
> > 
> > So I merge this tag to try to resolve it, now I get qemu segfaulting
> > bisected to:
> > 
> > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978
> > Author: Cao jin <caoj.fnst@cn.fujitsu.com>
> > Date:   Sat Nov 5 10:07:21 2016 +0800
> > 
> >     hcd-xhci: check & correct param before using it

Here's the backtrace from this one:

#0  0x0000555555a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824
#1  0x0000555555a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072)
    at hw/usb/hcd-xhci.c:2870
#2  0x0000555555a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0)
    at hw/usb/hcd-xhci.c:2905
#3  0x0000555555a9f435 in xhci_reset (dev=0x7fffbfb38010)
    at hw/usb/hcd-xhci.c:2961
#4  0x0000555555984a2e in device_reset (dev=0x7fffbfb38010)
    at hw/core/qdev.c:1145
#5  0x00005555559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0)
    at hw/core/qdev.c:295
#6  0x00005555559834df in qdev_walk_children (dev=0x7fffbfb38010, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:610
#7  0x000055555598763b in qbus_walk_children (bus=0x555556a40f30, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
#8  0x00005555559834a3 in qdev_walk_children (dev=0x555556a3f220, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:602
#9  0x000055555598763b in qbus_walk_children (bus=0x5555568296c0, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
#10 0x00005555559829f5 in qbus_reset_all (bus=0x5555568296c0)
    at hw/core/qdev.c:321
#11 0x0000555555982a18 in qbus_reset_all_fn (opaque=0x5555568296c0)
    at hw/core/qdev.c:327
#12 0x00005555558e76f0 in qemu_devices_reset () at vl.c:1765
#13 0x00005555558367ce in pc_machine_reset ()
    at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181
#14 0x00005555558e778d in qemu_system_reset (report=false) at vl.c:1778
#15 0x00005555558ef44b in main (argc=50, argv=0x7fffffffdf48, envp=0x7fffffffe0e0) at vl.c:4656

Here's a commandline sufficient to trigger it:

qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor stdio -device nec-usb-xhci

So apparently that never got tested or something got mangled in the
commit.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-10 22:48     ` Alex Williamson
@ 2016-11-10 22:51       ` Michael S. Tsirkin
  2016-11-11  2:46         ` Cao jin
  2016-11-10 23:09       ` Michael S. Tsirkin
  1 sibling, 1 reply; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 22:51 UTC (permalink / raw)
  To: Alex Williamson
  Cc: qemu-devel, Peter Maydell, Stefan Hajnoczi, Cao jin,
	Paolo Bonzini, Felipe Franciosi

On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote:
> On Fri, 11 Nov 2016 00:33:17 +0200
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote:
> > > On Thu, 10 Nov 2016 18:12:20 +0200
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > >   
> > > > The following changes since commit 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6:
> > > > 
> > > >   MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +0000)
> > > > 
> > > > are available in the git repository at:
> > > > 
> > > >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> > > > 
> > > > for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17:
> > > > 
> > > >   docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200)
> > > > 
> > > > ----------------------------------------------------------------
> > > > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
> > > > 
> > > > Lots of fixes all over the place.  I allowed some cleanups in even though they
> > > > are not strictly bugfixes, they might prevent bugs and seem very safe.
> > > > 
> > > > Most importantly, this fixes a regression with vhost introduced
> > > > by the last pull.  
> > > 
> > > I think I'm hitting this previous regression, I see this in my libvirt
> > > log:
> > > 
> > > kvm_mem_ioeventfd_add: error adding ioeventfd: File exists
> > > 
> > > And a backtrace as seen here
> > > https://paste.fedoraproject.org/477562/88144131/
> > > 
> > > So I merge this tag to try to resolve it, now I get qemu segfaulting
> > > bisected to:
> > > 
> > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978
> > > Author: Cao jin <caoj.fnst@cn.fujitsu.com>
> > > Date:   Sat Nov 5 10:07:21 2016 +0800
> > > 
> > >     hcd-xhci: check & correct param before using it
> 
> Here's the backtrace from this one:
> 
> #0  0x0000555555a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824
> #1  0x0000555555a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072)
>     at hw/usb/hcd-xhci.c:2870
> #2  0x0000555555a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0)
>     at hw/usb/hcd-xhci.c:2905
> #3  0x0000555555a9f435 in xhci_reset (dev=0x7fffbfb38010)
>     at hw/usb/hcd-xhci.c:2961
> #4  0x0000555555984a2e in device_reset (dev=0x7fffbfb38010)
>     at hw/core/qdev.c:1145
> #5  0x00005555559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0)
>     at hw/core/qdev.c:295
> #6  0x00005555559834df in qdev_walk_children (dev=0x7fffbfb38010, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:610
> #7  0x000055555598763b in qbus_walk_children (bus=0x555556a40f30, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
> #8  0x00005555559834a3 in qdev_walk_children (dev=0x555556a3f220, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:602
> #9  0x000055555598763b in qbus_walk_children (bus=0x5555568296c0, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
> #10 0x00005555559829f5 in qbus_reset_all (bus=0x5555568296c0)
>     at hw/core/qdev.c:321
> #11 0x0000555555982a18 in qbus_reset_all_fn (opaque=0x5555568296c0)
>     at hw/core/qdev.c:327
> #12 0x00005555558e76f0 in qemu_devices_reset () at vl.c:1765
> #13 0x00005555558367ce in pc_machine_reset ()
>     at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181
> #14 0x00005555558e778d in qemu_system_reset (report=false) at vl.c:1778
> #15 0x00005555558ef44b in main (argc=50, argv=0x7fffffffdf48, envp=0x7fffffffe0e0) at vl.c:4656
> 
> Here's a commandline sufficient to trigger it:
> 
> qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor stdio -device nec-usb-xhci
> 
> So apparently that never got tested or something got mangled in the
> commit.  Thanks,
> 
> Alex

I think I'll drop this, this patchset was borderline useful anyway.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-10 22:48     ` Alex Williamson
  2016-11-10 22:51       ` Michael S. Tsirkin
@ 2016-11-10 23:09       ` Michael S. Tsirkin
  2016-11-10 23:44         ` Alex Williamson
  1 sibling, 1 reply; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-10 23:09 UTC (permalink / raw)
  To: Alex Williamson
  Cc: qemu-devel, Peter Maydell, Stefan Hajnoczi, Cao jin,
	Paolo Bonzini, Felipe Franciosi

On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote:
> On Fri, 11 Nov 2016 00:33:17 +0200
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote:
> > > On Thu, 10 Nov 2016 18:12:20 +0200
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > >   
> > > > The following changes since commit 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6:
> > > > 
> > > >   MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +0000)
> > > > 
> > > > are available in the git repository at:
> > > > 
> > > >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> > > > 
> > > > for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17:
> > > > 
> > > >   docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200)
> > > > 
> > > > ----------------------------------------------------------------
> > > > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
> > > > 
> > > > Lots of fixes all over the place.  I allowed some cleanups in even though they
> > > > are not strictly bugfixes, they might prevent bugs and seem very safe.
> > > > 
> > > > Most importantly, this fixes a regression with vhost introduced
> > > > by the last pull.  
> > > 
> > > I think I'm hitting this previous regression, I see this in my libvirt
> > > log:
> > > 
> > > kvm_mem_ioeventfd_add: error adding ioeventfd: File exists
> > > 
> > > And a backtrace as seen here
> > > https://paste.fedoraproject.org/477562/88144131/
> > > 
> > > So I merge this tag to try to resolve it, now I get qemu segfaulting
> > > bisected to:
> > > 
> > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978
> > > Author: Cao jin <caoj.fnst@cn.fujitsu.com>
> > > Date:   Sat Nov 5 10:07:21 2016 +0800
> > > 
> > >     hcd-xhci: check & correct param before using it
> 
> Here's the backtrace from this one:
> 
> #0  0x0000555555a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824
> #1  0x0000555555a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072)
>     at hw/usb/hcd-xhci.c:2870
> #2  0x0000555555a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0)
>     at hw/usb/hcd-xhci.c:2905
> #3  0x0000555555a9f435 in xhci_reset (dev=0x7fffbfb38010)
>     at hw/usb/hcd-xhci.c:2961
> #4  0x0000555555984a2e in device_reset (dev=0x7fffbfb38010)
>     at hw/core/qdev.c:1145
> #5  0x00005555559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0)
>     at hw/core/qdev.c:295
> #6  0x00005555559834df in qdev_walk_children (dev=0x7fffbfb38010, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:610
> #7  0x000055555598763b in qbus_walk_children (bus=0x555556a40f30, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
> #8  0x00005555559834a3 in qdev_walk_children (dev=0x555556a3f220, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:602
> #9  0x000055555598763b in qbus_walk_children (bus=0x5555568296c0, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
> #10 0x00005555559829f5 in qbus_reset_all (bus=0x5555568296c0)
>     at hw/core/qdev.c:321
> #11 0x0000555555982a18 in qbus_reset_all_fn (opaque=0x5555568296c0)
>     at hw/core/qdev.c:327
> #12 0x00005555558e76f0 in qemu_devices_reset () at vl.c:1765
> #13 0x00005555558367ce in pc_machine_reset ()
>     at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181
> #14 0x00005555558e778d in qemu_system_reset (report=false) at vl.c:1778
> #15 0x00005555558ef44b in main (argc=50, argv=0x7fffffffdf48, envp=0x7fffffffe0e0) at vl.c:4656
> 
> Here's a commandline sufficient to trigger it:
> 
> qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor stdio -device nec-usb-xhci
> 
> So apparently that never got tested or something got mangled in the
> commit.  Thanks,
> 
> Alex

On the original tree (without this mangled pull), does one of
[PATCH] vhost-scsi: Update 'ioeventfd_started' with host notifiers
and
[PATCH v2] vhost: Update 'ioeventfd_started' with host notifiers

help?

-- 
MST

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-10 23:09       ` Michael S. Tsirkin
@ 2016-11-10 23:44         ` Alex Williamson
  2016-11-11  0:03           ` John Fastabend
  0 siblings, 1 reply; 67+ messages in thread
From: Alex Williamson @ 2016-11-10 23:44 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: qemu-devel, Peter Maydell, Stefan Hajnoczi, Cao jin,
	Paolo Bonzini, Felipe Franciosi

On Fri, 11 Nov 2016 01:09:05 +0200
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote:
> > On Fri, 11 Nov 2016 00:33:17 +0200
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >   
> > > On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote:  
> > > > On Thu, 10 Nov 2016 18:12:20 +0200
> > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > >     
> > > > > The following changes since commit 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6:
> > > > > 
> > > > >   MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +0000)
> > > > > 
> > > > > are available in the git repository at:
> > > > > 
> > > > >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> > > > > 
> > > > > for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17:
> > > > > 
> > > > >   docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200)
> > > > > 
> > > > > ----------------------------------------------------------------
> > > > > virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
> > > > > 
> > > > > Lots of fixes all over the place.  I allowed some cleanups in even though they
> > > > > are not strictly bugfixes, they might prevent bugs and seem very safe.
> > > > > 
> > > > > Most importantly, this fixes a regression with vhost introduced
> > > > > by the last pull.    
> > > > 
> > > > I think I'm hitting this previous regression, I see this in my libvirt
> > > > log:
> > > > 
> > > > kvm_mem_ioeventfd_add: error adding ioeventfd: File exists
> > > > 
> > > > And a backtrace as seen here
> > > > https://paste.fedoraproject.org/477562/88144131/
> > > > 
> > > > So I merge this tag to try to resolve it, now I get qemu segfaulting
> > > > bisected to:
> > > > 
> > > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978
> > > > Author: Cao jin <caoj.fnst@cn.fujitsu.com>
> > > > Date:   Sat Nov 5 10:07:21 2016 +0800
> > > > 
> > > >     hcd-xhci: check & correct param before using it  
> > 
> > Here's the backtrace from this one:
> > 
> > #0  0x0000555555a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824
> > #1  0x0000555555a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072)
> >     at hw/usb/hcd-xhci.c:2870
> > #2  0x0000555555a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0)
> >     at hw/usb/hcd-xhci.c:2905
> > #3  0x0000555555a9f435 in xhci_reset (dev=0x7fffbfb38010)
> >     at hw/usb/hcd-xhci.c:2961
> > #4  0x0000555555984a2e in device_reset (dev=0x7fffbfb38010)
> >     at hw/core/qdev.c:1145
> > #5  0x00005555559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0)
> >     at hw/core/qdev.c:295
> > #6  0x00005555559834df in qdev_walk_children (dev=0x7fffbfb38010, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:610
> > #7  0x000055555598763b in qbus_walk_children (bus=0x555556a40f30, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
> > #8  0x00005555559834a3 in qdev_walk_children (dev=0x555556a3f220, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:602
> > #9  0x000055555598763b in qbus_walk_children (bus=0x5555568296c0, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
> > #10 0x00005555559829f5 in qbus_reset_all (bus=0x5555568296c0)
> >     at hw/core/qdev.c:321
> > #11 0x0000555555982a18 in qbus_reset_all_fn (opaque=0x5555568296c0)
> >     at hw/core/qdev.c:327
> > #12 0x00005555558e76f0 in qemu_devices_reset () at vl.c:1765
> > #13 0x00005555558367ce in pc_machine_reset ()
> >     at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181
> > #14 0x00005555558e778d in qemu_system_reset (report=false) at vl.c:1778
> > #15 0x00005555558ef44b in main (argc=50, argv=0x7fffffffdf48, envp=0x7fffffffe0e0) at vl.c:4656
> > 
> > Here's a commandline sufficient to trigger it:
> > 
> > qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor stdio -device nec-usb-xhci
> > 
> > So apparently that never got tested or something got mangled in the
> > commit.  Thanks,
> > 
> > Alex  
> 
> On the original tree (without this mangled pull), does one of
> [PATCH] vhost-scsi: Update 'ioeventfd_started' with host notifiers
> and
> [PATCH v2] vhost: Update 'ioeventfd_started' with host notifiers

The latter I already have from your tag as f99d0d394758, adding the
former on top of your tag does not resolve the assert (I simply removed
the xhci device from the vm config to avoid the segfault).  Thanks,

Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-10 23:44         ` Alex Williamson
@ 2016-11-11  0:03           ` John Fastabend
  2016-11-11 21:48             ` [Qemu-devel] kvm_mem_ioeventfd_add: error adding ioeventfd: File exists (was Re: [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups) Michael S. Tsirkin
  0 siblings, 1 reply; 67+ messages in thread
From: John Fastabend @ 2016-11-11  0:03 UTC (permalink / raw)
  To: Alex Williamson, Michael S. Tsirkin
  Cc: Peter Maydell, qemu-devel, Cao jin, Stefan Hajnoczi,
	Felipe Franciosi, Paolo Bonzini

On 16-11-10 03:44 PM, Alex Williamson wrote:
> On Fri, 11 Nov 2016 01:09:05 +0200
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
>> On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote:
>>> On Fri, 11 Nov 2016 00:33:17 +0200
>>> "Michael S. Tsirkin" <mst@redhat.com> wrote:
>>>   
>>>> On Thu, Nov 10, 2016 at 03:29:21PM -0700, Alex Williamson wrote:  
>>>>> On Thu, 10 Nov 2016 18:12:20 +0200
>>>>> "Michael S. Tsirkin" <mst@redhat.com> wrote:
>>>>>     
>>>>>> The following changes since commit 6bbcb76301a72dc80c8d29af13d40bb9a759c9c6:
>>>>>>
>>>>>>   MAINTAINERS: Remove obsolete stable branches (2016-11-10 15:29:59 +0000)
>>>>>>
>>>>>> are available in the git repository at:
>>>>>>
>>>>>>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
>>>>>>
>>>>>> for you to fetch changes up to 8038753b86f4cb1e79d4225a799395c4dae96b17:
>>>>>>
>>>>>>   docs: add PCIe devices placement guidelines (2016-11-10 18:08:06 +0200)
>>>>>>
>>>>>> ----------------------------------------------------------------
>>>>>> virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
>>>>>>
>>>>>> Lots of fixes all over the place.  I allowed some cleanups in even though they
>>>>>> are not strictly bugfixes, they might prevent bugs and seem very safe.
>>>>>>
>>>>>> Most importantly, this fixes a regression with vhost introduced
>>>>>> by the last pull.    
>>>>>
>>>>> I think I'm hitting this previous regression, I see this in my libvirt
>>>>> log:
>>>>>
>>>>> kvm_mem_ioeventfd_add: error adding ioeventfd: File exists
>>>>>
>>>>> And a backtrace as seen here
>>>>> https://paste.fedoraproject.org/477562/88144131/
>>>>>
>>>>> So I merge this tag to try to resolve it, now I get qemu segfaulting
>>>>> bisected to:
>>>>>
>>>>> commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978
>>>>> Author: Cao jin <caoj.fnst@cn.fujitsu.com>
>>>>> Date:   Sat Nov 5 10:07:21 2016 +0800
>>>>>
>>>>>     hcd-xhci: check & correct param before using it  
>>>
>>> Here's the backtrace from this one:
>>>
>>> #0  0x0000555555a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824
>>> #1  0x0000555555a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072)
>>>     at hw/usb/hcd-xhci.c:2870
>>> #2  0x0000555555a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0)
>>>     at hw/usb/hcd-xhci.c:2905
>>> #3  0x0000555555a9f435 in xhci_reset (dev=0x7fffbfb38010)
>>>     at hw/usb/hcd-xhci.c:2961
>>> #4  0x0000555555984a2e in device_reset (dev=0x7fffbfb38010)
>>>     at hw/core/qdev.c:1145
>>> #5  0x00005555559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0)
>>>     at hw/core/qdev.c:295
>>> #6  0x00005555559834df in qdev_walk_children (dev=0x7fffbfb38010, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:610
>>> #7  0x000055555598763b in qbus_walk_children (bus=0x555556a40f30, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
>>> #8  0x00005555559834a3 in qdev_walk_children (dev=0x555556a3f220, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:602
>>> #9  0x000055555598763b in qbus_walk_children (bus=0x5555568296c0, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
>>> #10 0x00005555559829f5 in qbus_reset_all (bus=0x5555568296c0)
>>>     at hw/core/qdev.c:321
>>> #11 0x0000555555982a18 in qbus_reset_all_fn (opaque=0x5555568296c0)
>>>     at hw/core/qdev.c:327
>>> #12 0x00005555558e76f0 in qemu_devices_reset () at vl.c:1765
>>> #13 0x00005555558367ce in pc_machine_reset ()
>>>     at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181
>>> #14 0x00005555558e778d in qemu_system_reset (report=false) at vl.c:1778
>>> #15 0x00005555558ef44b in main (argc=50, argv=0x7fffffffdf48, envp=0x7fffffffe0e0) at vl.c:4656
>>>
>>> Here's a commandline sufficient to trigger it:
>>>
>>> qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor stdio -device nec-usb-xhci
>>>
>>> So apparently that never got tested or something got mangled in the
>>> commit.  Thanks,
>>>
>>> Alex  
>>
>> On the original tree (without this mangled pull), does one of
>> [PATCH] vhost-scsi: Update 'ioeventfd_started' with host notifiers
>> and
>> [PATCH v2] vhost: Update 'ioeventfd_started' with host notifiers
> 
> The latter I already have from your tag as f99d0d394758, adding the
> former on top of your tag does not resolve the assert (I simply removed
> the xhci device from the vm config to avoid the segfault).  Thanks,
> 
> Alex
> 

hmm with those patches it doesn't even get past boot for me.

qemu-system-x86_64: /home/john/git/qemu/memory.c:2012:
memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed.
Aborted (core dumped)

I need to run out for a bit but can look at it more tomorrow if needed.

.John

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-10 22:51       ` Michael S. Tsirkin
@ 2016-11-11  2:46         ` Cao jin
  2016-11-11  3:32           ` Michael S. Tsirkin
  0 siblings, 1 reply; 67+ messages in thread
From: Cao jin @ 2016-11-11  2:46 UTC (permalink / raw)
  To: Michael S. Tsirkin, Alex Williamson
  Cc: qemu-devel, Peter Maydell, Stefan Hajnoczi, Paolo Bonzini,
	Felipe Franciosi



On 11/11/2016 06:51 AM, Michael S. Tsirkin wrote:
> On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote:

>>>> So I merge this tag to try to resolve it, now I get qemu segfaulting
>>>> bisected to:
>>>>
>>>> commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978
>>>> Author: Cao jin <caoj.fnst@cn.fujitsu.com>
>>>> Date:   Sat Nov 5 10:07:21 2016 +0800
>>>>
>>>>      hcd-xhci: check & correct param before using it
>>
>> Here's the backtrace from this one:
>>
>> #0  0x0000555555a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824
>> #1  0x0000555555a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072)
>>      at hw/usb/hcd-xhci.c:2870
>> #2  0x0000555555a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0)
>>      at hw/usb/hcd-xhci.c:2905
>> #3  0x0000555555a9f435 in xhci_reset (dev=0x7fffbfb38010)
>>      at hw/usb/hcd-xhci.c:2961
>> #4  0x0000555555984a2e in device_reset (dev=0x7fffbfb38010)
>>      at hw/core/qdev.c:1145
>> #5  0x00005555559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0)
>>      at hw/core/qdev.c:295
>> #6  0x00005555559834df in qdev_walk_children (dev=0x7fffbfb38010, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:610
>> #7  0x000055555598763b in qbus_walk_children (bus=0x555556a40f30, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
>> #8  0x00005555559834a3 in qdev_walk_children (dev=0x555556a3f220, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:602
>> #9  0x000055555598763b in qbus_walk_children (bus=0x5555568296c0, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
>> #10 0x00005555559829f5 in qbus_reset_all (bus=0x5555568296c0)
>>      at hw/core/qdev.c:321
>> #11 0x0000555555982a18 in qbus_reset_all_fn (opaque=0x5555568296c0)
>>      at hw/core/qdev.c:327
>> #12 0x00005555558e76f0 in qemu_devices_reset () at vl.c:1765
>> #13 0x00005555558367ce in pc_machine_reset ()
>>      at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181
>> #14 0x00005555558e778d in qemu_system_reset (report=false) at vl.c:1778
>> #15 0x00005555558ef44b in main (argc=50, argv=0x7fffffffdf48, envp=0x7fffffffe0e0) at vl.c:4656
>>
>> Here's a commandline sufficient to trigger it:
>>
>> qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor stdio -device nec-usb-xhci
>>
>> So apparently that never got tested or something got mangled in the
>> commit.  Thanks,
>>
>> Alex
>
> I think I'll drop this, this patchset was borderline useful anyway.
>

Really sorry for the issue, I moved usb_xhci_init() too far from its 
original place, results in the segmentation fault.

Could I send new version of this patch to fix it right now?

-- 
Yours Sincerely,

Cao jin

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-11  2:46         ` Cao jin
@ 2016-11-11  3:32           ` Michael S. Tsirkin
  2016-11-11  3:51             ` Cao jin
  0 siblings, 1 reply; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-11  3:32 UTC (permalink / raw)
  To: Cao jin
  Cc: Alex Williamson, qemu-devel, Peter Maydell, Stefan Hajnoczi,
	Paolo Bonzini, Felipe Franciosi

On Fri, Nov 11, 2016 at 10:46:29AM +0800, Cao jin wrote:
> 
> 
> On 11/11/2016 06:51 AM, Michael S. Tsirkin wrote:
> > On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote:
> 
> > > > > So I merge this tag to try to resolve it, now I get qemu segfaulting
> > > > > bisected to:
> > > > > 
> > > > > commit a6d8372bc6764ee279b473d13ff4ecc8acb7a978
> > > > > Author: Cao jin <caoj.fnst@cn.fujitsu.com>
> > > > > Date:   Sat Nov 5 10:07:21 2016 +0800
> > > > > 
> > > > >      hcd-xhci: check & correct param before using it
> > > 
> > > Here's the backtrace from this one:
> > > 
> > > #0  0x0000555555a99d90 in xhci_running (xhci=0x0) at hw/usb/hcd-xhci.c:824
> > > #1  0x0000555555a9f092 in xhci_port_notify (port=0x7fffbfb39330, bits=131072)
> > >      at hw/usb/hcd-xhci.c:2870
> > > #2  0x0000555555a9f221 in xhci_port_update (port=0x7fffbfb39330, is_detach=0)
> > >      at hw/usb/hcd-xhci.c:2905
> > > #3  0x0000555555a9f435 in xhci_reset (dev=0x7fffbfb38010)
> > >      at hw/usb/hcd-xhci.c:2961
> > > #4  0x0000555555984a2e in device_reset (dev=0x7fffbfb38010)
> > >      at hw/core/qdev.c:1145
> > > #5  0x00005555559828db in qdev_reset_one (dev=0x7fffbfb38010, opaque=0x0)
> > >      at hw/core/qdev.c:295
> > > #6  0x00005555559834df in qdev_walk_children (dev=0x7fffbfb38010, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:610
> > > #7  0x000055555598763b in qbus_walk_children (bus=0x555556a40f30, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
> > > #8  0x00005555559834a3 in qdev_walk_children (dev=0x555556a3f220, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/qdev.c:602
> > > #9  0x000055555598763b in qbus_walk_children (bus=0x5555568296c0, pre_devfn=0x0, pre_busfn=0x0, post_devfn=0x5555559828bf <qdev_reset_one>, post_busfn=0x5555559828e2 <qbus_reset_one>, opaque=0x0) at hw/core/bus.c:59
> > > #10 0x00005555559829f5 in qbus_reset_all (bus=0x5555568296c0)
> > >      at hw/core/qdev.c:321
> > > #11 0x0000555555982a18 in qbus_reset_all_fn (opaque=0x5555568296c0)
> > >      at hw/core/qdev.c:327
> > > #12 0x00005555558e76f0 in qemu_devices_reset () at vl.c:1765
> > > #13 0x00005555558367ce in pc_machine_reset ()
> > >      at /net/gimli/home/alwillia/Work/qemu.git/hw/i386/pc.c:2181
> > > #14 0x00005555558e778d in qemu_system_reset (report=false) at vl.c:1778
> > > #15 0x00005555558ef44b in main (argc=50, argv=0x7fffffffdf48, envp=0x7fffffffe0e0) at vl.c:4656
> > > 
> > > Here's a commandline sufficient to trigger it:
> > > 
> > > qemu-system-x86_64 -m 1G -nodefaults -no-user-config -display none -monitor stdio -device nec-usb-xhci
> > > 
> > > So apparently that never got tested or something got mangled in the
> > > commit.  Thanks,
> > > 
> > > Alex
> > 
> > I think I'll drop this, this patchset was borderline useful anyway.
> > 
> 
> Really sorry for the issue, I moved usb_xhci_init() too far from its
> original place, results in the segmentation fault.
> 
> Could I send new version of this patch to fix it right now?
> 
> -- 
> Yours Sincerely,
> 
> Cao jin
> 

OK but I'd like to know which other patches
in the patchset are untested. You really must inform people
when you post untested patches.

-- 
MST

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-11  3:51             ` Cao jin
@ 2016-11-11  3:49               ` Michael S. Tsirkin
  0 siblings, 0 replies; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-11  3:49 UTC (permalink / raw)
  To: Cao jin
  Cc: Alex Williamson, qemu-devel, Peter Maydell, Stefan Hajnoczi,
	Paolo Bonzini, Felipe Franciosi

On Fri, Nov 11, 2016 at 11:51:34AM +0800, Cao jin wrote:
> 
> 
> On 11/11/2016 11:32 AM, Michael S. Tsirkin wrote:
> > On Fri, Nov 11, 2016 at 10:46:29AM +0800, Cao jin wrote:
> > > 
> > > 
> > > On 11/11/2016 06:51 AM, Michael S. Tsirkin wrote:
> > > > On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote:
> > > 
> 
> > > > 
> > > > I think I'll drop this, this patchset was borderline useful anyway.
> > > > 
> > > 
> > > Really sorry for the issue, I moved usb_xhci_init() too far from its
> > > original place, results in the segmentation fault.
> > > 
> > > Could I send new version of this patch to fix it right now?
> > > 
> 
> > > 
> > 
> > OK but I'd like to know which other patches
> > in the patchset are untested. You really must inform people
> > when you post untested patches.
> > 
> 
> I see, really sorry for my mistake. My patches just pass make check, didn't
> test for each patch.
> -- 
> Yours Sincerely,
> 
> Cao jin
> 

I think it's best to drop them for now in this case.


-- 
MST

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups
  2016-11-11  3:32           ` Michael S. Tsirkin
@ 2016-11-11  3:51             ` Cao jin
  2016-11-11  3:49               ` Michael S. Tsirkin
  0 siblings, 1 reply; 67+ messages in thread
From: Cao jin @ 2016-11-11  3:51 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Alex Williamson, qemu-devel, Peter Maydell, Stefan Hajnoczi,
	Paolo Bonzini, Felipe Franciosi



On 11/11/2016 11:32 AM, Michael S. Tsirkin wrote:
> On Fri, Nov 11, 2016 at 10:46:29AM +0800, Cao jin wrote:
>>
>>
>> On 11/11/2016 06:51 AM, Michael S. Tsirkin wrote:
>>> On Thu, Nov 10, 2016 at 03:48:28PM -0700, Alex Williamson wrote:
>>

>>>
>>> I think I'll drop this, this patchset was borderline useful anyway.
>>>
>>
>> Really sorry for the issue, I moved usb_xhci_init() too far from its
>> original place, results in the segmentation fault.
>>
>> Could I send new version of this patch to fix it right now?
>>

>>
>
> OK but I'd like to know which other patches
> in the patchset are untested. You really must inform people
> when you post untested patches.
>

I see, really sorry for my mistake. My patches just pass make check, 
didn't test for each patch.
-- 
Yours Sincerely,

Cao jin

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 36/47] pci: Convert msix_init() to Error and fix callers to check it
  2016-11-10 16:14 ` [Qemu-devel] [PULL 36/47] pci: Convert msix_init() to Error and fix callers to check it Michael S. Tsirkin
@ 2016-11-11  6:55   ` Hannes Reinecke
  0 siblings, 0 replies; 67+ messages in thread
From: Hannes Reinecke @ 2016-11-11  6:55 UTC (permalink / raw)
  To: Michael S. Tsirkin, qemu-devel
  Cc: Peter Maydell, Cao jin, Jiri Pirko, Gerd Hoffmann,
	Dmitry Fleytman, Jason Wang, Paolo Bonzini, Alex Williamson,
	Markus Armbruster, Marcel Apfelbaum, Keith Busch, Kevin Wolf,
	Max Reitz, qemu-block

On 11/10/2016 05:14 PM, Michael S. Tsirkin wrote:
> From: Cao jin <caoj.fnst@cn.fujitsu.com>
> 
> msix_init() reports errors with error_report(), which is wrong when
> it's used in realize().  The same issue was fixed for msi_init() in
> commit 1108b2f.
> 
> For some devices(like e1000e, vmxnet3) who won't fail because of
> msix_init's failure, suppress the error report by passing NULL error object.
> 
> Bonus: add comment for msix_init.
> 
> CC: Jiri Pirko <jiri@resnulli.us>
> CC: Gerd Hoffmann <kraxel@redhat.com>
> CC: Dmitry Fleytman <dmitry@daynix.com>
> CC: Jason Wang <jasowang@redhat.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> CC: Hannes Reinecke <hare@suse.de>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Alex Williamson <alex.williamson@redhat.com>
> CC: Markus Armbruster <armbru@redhat.com>
> CC: Marcel Apfelbaum <marcel@redhat.com>
> 
> Reviewed-by: Markus Armbruster <armbru@redhat.com>
> Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
> Acked-by: Marcel Apfelbaum <marcel@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  include/hw/pci/msix.h  |  5 +++--
>  hw/block/nvme.c        |  5 ++++-
>  hw/misc/ivshmem.c      |  8 ++++----
>  hw/net/e1000e.c        |  6 +++++-
>  hw/net/rocker/rocker.c |  7 ++++++-
>  hw/net/vmxnet3.c       |  8 ++++++--
>  hw/pci/msix.c          | 34 +++++++++++++++++++++++++++++-----
>  hw/scsi/megasas.c      |  5 ++++-
>  hw/usb/hcd-xhci.c      | 13 ++++++++-----
>  hw/vfio/pci.c          |  8 ++++++--
>  hw/virtio/virtio-pci.c | 11 +++++------
>  11 files changed, 80 insertions(+), 30 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 37/47] megasas: change behaviour of msix switch
  2016-11-10 16:14 ` [Qemu-devel] [PULL 37/47] megasas: change behaviour of msix switch Michael S. Tsirkin
@ 2016-11-11  6:56   ` Hannes Reinecke
  0 siblings, 0 replies; 67+ messages in thread
From: Hannes Reinecke @ 2016-11-11  6:56 UTC (permalink / raw)
  To: Michael S. Tsirkin, qemu-devel
  Cc: Peter Maydell, Cao jin, Paolo Bonzini, Markus Armbruster,
	Marcel Apfelbaum, qemu-block

On 11/10/2016 05:14 PM, Michael S. Tsirkin wrote:
> From: Cao jin <caoj.fnst@cn.fujitsu.com>
> 
> Resolve the TODO, msix=auto means msix on; if user specify msix=on,
> then device creation fail on msix_init failure.
> Also undo the overwrites of user configuration of msix.
> 
> CC: Michael S. Tsirkin <mst@redhat.com>
> CC: Hannes Reinecke <hare@suse.de>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Markus Armbruster <armbru@redhat.com>
> CC: Marcel Apfelbaum <marcel@redhat.com>
> 
> Reviewed-by: Markus Armbruster <armbru@redhat.com>
> Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
> Acked-by: Marcel Apfelbaum <marcel@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/scsi/megasas.c | 28 ++++++++++++++++++++--------
>  1 file changed, 20 insertions(+), 8 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 39/47] megasas: remove unnecessary megasas_use_msix()
  2016-11-10 16:14 ` [Qemu-devel] [PULL 39/47] megasas: remove unnecessary megasas_use_msix() Michael S. Tsirkin
@ 2016-11-11  6:57   ` Hannes Reinecke
  0 siblings, 0 replies; 67+ messages in thread
From: Hannes Reinecke @ 2016-11-11  6:57 UTC (permalink / raw)
  To: Michael S. Tsirkin, qemu-devel
  Cc: Peter Maydell, Cao jin, Paolo Bonzini, Markus Armbruster,
	Marcel Apfelbaum, qemu-block

On 11/10/2016 05:14 PM, Michael S. Tsirkin wrote:
> From: Cao jin <caoj.fnst@cn.fujitsu.com>
> 
> Also move certain hunk above, to place msix init related code together.
> 
> CC: Hannes Reinecke <hare@suse.de>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Markus Armbruster <armbru@redhat.com>
> CC: Marcel Apfelbaum <marcel@redhat.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> 
> Reviewed-by: Markus Armbruster <armbru@redhat.com>
> Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
> Acked-by: Marcel Apfelbaum <marcel@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/scsi/megasas.c | 19 ++++++-------------
>  1 file changed, 6 insertions(+), 13 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] [PULL 40/47] megasas: undo the overwrites of msi user configuration
  2016-11-10 16:14 ` [Qemu-devel] [PULL 40/47] megasas: undo the overwrites of msi user configuration Michael S. Tsirkin
@ 2016-11-11  6:57   ` Hannes Reinecke
  0 siblings, 0 replies; 67+ messages in thread
From: Hannes Reinecke @ 2016-11-11  6:57 UTC (permalink / raw)
  To: Michael S. Tsirkin, qemu-devel
  Cc: Peter Maydell, Cao jin, Paolo Bonzini, Markus Armbruster,
	Marcel Apfelbaum, qemu-block

On 11/10/2016 05:14 PM, Michael S. Tsirkin wrote:
> From: Cao jin <caoj.fnst@cn.fujitsu.com>
> 
> Commit afea4e14 seems forgetting to undo the overwrites, which is
> unsuitable.
> 
> CC: Hannes Reinecke <hare@suse.de>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Markus Armbruster <armbru@redhat.com>
> CC: Marcel Apfelbaum <marcel@redhat.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> 
> Reviewed-by: Markus Armbruster <armbru@redhat.com>
> Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
> Acked-by: Marcel Apfelbaum <marcel@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/scsi/megasas.c | 7 +++----
>  1 file changed, 3 insertions(+), 4 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 67+ messages in thread

* [Qemu-devel] kvm_mem_ioeventfd_add: error adding ioeventfd: File exists (was Re: [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups)
  2016-11-11  0:03           ` John Fastabend
@ 2016-11-11 21:48             ` Michael S. Tsirkin
  2016-11-11 22:43               ` Alex Williamson
  0 siblings, 1 reply; 67+ messages in thread
From: Michael S. Tsirkin @ 2016-11-11 21:48 UTC (permalink / raw)
  To: John Fastabend
  Cc: Alex Williamson, Peter Maydell, qemu-devel, Cao jin,
	Stefan Hajnoczi, Felipe Franciosi, Paolo Bonzini

Paolo tells me he has fixes for the kvm ioeventfd crashes
in vhost.  I would appreciate testing and reports.


-- 
MST

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Qemu-devel] kvm_mem_ioeventfd_add: error adding ioeventfd: File exists (was Re: [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups)
  2016-11-11 21:48             ` [Qemu-devel] kvm_mem_ioeventfd_add: error adding ioeventfd: File exists (was Re: [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups) Michael S. Tsirkin
@ 2016-11-11 22:43               ` Alex Williamson
  0 siblings, 0 replies; 67+ messages in thread
From: Alex Williamson @ 2016-11-11 22:43 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: John Fastabend, Peter Maydell, qemu-devel, Cao jin,
	Stefan Hajnoczi, Felipe Franciosi, Paolo Bonzini

On Fri, 11 Nov 2016 23:48:58 +0200
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> Paolo tells me he has fixes for the kvm ioeventfd crashes
> in vhost.  I would appreciate testing and reports.

I replied to Paolo's patch, it did not fix the problem for me.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

end of thread, other threads:[~2016-11-11 22:43 UTC | newest]

Thread overview: 67+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-10 16:12 [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 05/47] contrib: add libvhost-user Michael S. Tsirkin
2016-11-10 17:05   ` Daniel P. Berrange
2016-11-10 17:20     ` Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 01/47] tests/vhost-user-bridge: remove false comment Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 02/47] tests/vhost-user-bridge: remove unnecessary dispatcher_remove Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 03/47] tests/vhost-user-bridge: indicate peer disconnected Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 04/47] tests/vhost-user-bridge: do not accept more than one connection Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 06/47] tests/vhost-user-bridge: use contrib/libvhost-user Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 07/47] intel_iommu: fixing source id during IOTLB hash key calculation Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 08/47] virtio: rename virtqueue_discard to virtqueue_unpop Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 09/47] virtio: make virtqueue_alloc_element static Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 10/47] pcie_aer: Convert pcie_aer_init to Error Michael S. Tsirkin
2016-11-10 16:12 ` [Qemu-devel] [PULL 11/47] virtio-crypto: tag as not hotpluggable and migration Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 12/47] intel_iommu: fix several incorrect endianess and bit fields Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 13/47] intel_iommu: fix incorrect assert Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 14/47] acpi: fix DMAR device scope for IOAPIC Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 15/47] virtio: allow per-device-class legacy features Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 16/47] virtio-net: mark VIRTIO_NET_F_GSO as legacy Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 17/47] vhost: Update 'ioeventfd_started' with host notifiers Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 18/47] vhost: Use vbus var instead of VIRTIO_BUS() macro Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 19/47] vhost: migration blocker only if shared log is used Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 20/47] qdev: hotplug: drop HotplugHandler.post_plug callback Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 21/47] nvdimm acpi: drop the lock of fit buffer Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 22/47] pc: memhp: move nvdimm hotplug out of memory hotplug Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 23/47] pc: memhp: stop handling nvdimm hotplug in pc_dimm_unplug Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 24/47] nvdimm acpi: clean up nvdimm_build_acpi Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 25/47] docs: improve the doc of Read FIT method Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 26/47] nvdimm acpi: rename nvdimm_plugged_device_list Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 27/47] nvdimm acpi: cleanup nvdimm_build_fit Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 28/47] nvdimm acpi: rename nvdimm_acpi_hotplug Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 29/47] nvdimm acpi: define DSM return codes Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 30/47] nvdimm acpi: fix two comments Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 31/47] nvdimm acpi: rename nvdimm_dsm_reserved_root Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 32/47] nvdimm acpi: use aml_name_decl to define named object Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 33/47] nvdimm acpi: introduce NVDIMM_DSM_MEMORY_SIZE Michael S. Tsirkin
2016-11-10 16:13 ` [Qemu-devel] [PULL 34/47] msix: Follow CODING_STYLE Michael S. Tsirkin
2016-11-10 16:14 ` [Qemu-devel] [PULL 35/47] hcd-xhci: check & correct param before using it Michael S. Tsirkin
2016-11-10 16:14 ` [Qemu-devel] [PULL 36/47] pci: Convert msix_init() to Error and fix callers to check it Michael S. Tsirkin
2016-11-11  6:55   ` Hannes Reinecke
2016-11-10 16:14 ` [Qemu-devel] [PULL 37/47] megasas: change behaviour of msix switch Michael S. Tsirkin
2016-11-11  6:56   ` Hannes Reinecke
2016-11-10 16:14 ` [Qemu-devel] [PULL 38/47] hcd-xhci: " Michael S. Tsirkin
2016-11-10 16:14 ` [Qemu-devel] [PULL 39/47] megasas: remove unnecessary megasas_use_msix() Michael S. Tsirkin
2016-11-11  6:57   ` Hannes Reinecke
2016-11-10 16:14 ` [Qemu-devel] [PULL 40/47] megasas: undo the overwrites of msi user configuration Michael S. Tsirkin
2016-11-11  6:57   ` Hannes Reinecke
2016-11-10 16:14 ` [Qemu-devel] [PULL 41/47] vmxnet3: fix reference leak issue Michael S. Tsirkin
2016-11-10 16:14 ` [Qemu-devel] [PULL 42/47] vmxnet3: remove unnecessary internal msix flag Michael S. Tsirkin
2016-11-10 16:14 ` [Qemu-devel] [PULL 43/47] msi_init: convert assert to return -errno Michael S. Tsirkin
2016-11-10 16:14 ` [Qemu-devel] [PULL 44/47] vhost: adapt vhost_verify_ring_mappings() to virtio 1 ring layout Michael S. Tsirkin
2016-11-10 16:14 ` [Qemu-devel] [PULL 45/47] vhost: drop legacy vring layout bits Michael S. Tsirkin
2016-11-10 16:14 ` [Qemu-devel] [PULL 46/47] virtio: drop virtio_queue_get_ring_{size, addr}() Michael S. Tsirkin
2016-11-10 16:14 ` [Qemu-devel] [PULL 47/47] docs: add PCIe devices placement guidelines Michael S. Tsirkin
2016-11-10 22:29 ` [Qemu-devel] [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups Alex Williamson
2016-11-10 22:33   ` Michael S. Tsirkin
2016-11-10 22:48     ` Alex Williamson
2016-11-10 22:51       ` Michael S. Tsirkin
2016-11-11  2:46         ` Cao jin
2016-11-11  3:32           ` Michael S. Tsirkin
2016-11-11  3:51             ` Cao jin
2016-11-11  3:49               ` Michael S. Tsirkin
2016-11-10 23:09       ` Michael S. Tsirkin
2016-11-10 23:44         ` Alex Williamson
2016-11-11  0:03           ` John Fastabend
2016-11-11 21:48             ` [Qemu-devel] kvm_mem_ioeventfd_add: error adding ioeventfd: File exists (was Re: [PULL 00/47] virtio, vhost, pc, pci: tests, documentation, fixes and cleanups) Michael S. Tsirkin
2016-11-11 22:43               ` Alex Williamson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.