On Fri, Nov 20, 2020 at 07:50:45PM +0100, Eugenio Pérez wrote: > Signed-off-by: Eugenio Pérez > --- > hw/virtio/vhost-sw-lm-ring.h | 26 +++++++++ > include/hw/virtio/vhost.h | 3 ++ > hw/virtio/vhost-sw-lm-ring.c | 60 +++++++++++++++++++++ > hw/virtio/vhost.c | 100 +++++++++++++++++++++++++++++++++-- > hw/virtio/meson.build | 2 +- > 5 files changed, 187 insertions(+), 4 deletions(-) > create mode 100644 hw/virtio/vhost-sw-lm-ring.h > create mode 100644 hw/virtio/vhost-sw-lm-ring.c > > diff --git a/hw/virtio/vhost-sw-lm-ring.h b/hw/virtio/vhost-sw-lm-ring.h > new file mode 100644 > index 0000000000..86dc081b93 > --- /dev/null > +++ b/hw/virtio/vhost-sw-lm-ring.h > @@ -0,0 +1,26 @@ > +/* > + * vhost software live migration ring > + * > + * SPDX-FileCopyrightText: Red Hat, Inc. 2020 > + * SPDX-FileContributor: Author: Eugenio Pérez > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#ifndef VHOST_SW_LM_RING_H > +#define VHOST_SW_LM_RING_H > + > +#include "qemu/osdep.h" > + > +#include "hw/virtio/virtio.h" > +#include "hw/virtio/vhost.h" > + > +typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; Here it's called a shadow virtqueue while the file calls it a sw-lm-ring. Please use a single name. > + > +bool vhost_vring_kick(VhostShadowVirtqueue *vq); vhost_shadow_vq_kick()? > + > +VhostShadowVirtqueue *vhost_sw_lm_shadow_vq(struct vhost_dev *dev, int idx); vhost_dev_get_shadow_vq()? This could be in include/hw/virtio/vhost.h with the other vhost_dev_*() functions. > + > +void vhost_sw_lm_shadow_vq_free(VhostShadowVirtqueue *vq); Hmm...now I wonder what the lifecycle is. Does vhost_sw_lm_shadow_vq() allocate it? Please add doc comments explaining these functions either in this header file or in vhost-sw-lm-ring.c. > + > +#endif > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h > index b5b7496537..93cc3f1ae3 100644 > --- a/include/hw/virtio/vhost.h > +++ b/include/hw/virtio/vhost.h > @@ -54,6 +54,8 @@ struct vhost_iommu { > QLIST_ENTRY(vhost_iommu) iommu_next; > }; > > +typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; > + > typedef struct VhostDevConfigOps { > /* Vhost device config space changed callback > */ > @@ -83,6 +85,7 @@ struct vhost_dev { > bool started; > bool log_enabled; > uint64_t log_size; > + VhostShadowVirtqueue *sw_lm_shadow_vq[2]; The hardcoded 2 is probably for single-queue virtio-net? I guess this will eventually become VhostShadowVirtqueue *shadow_vqs or VhostShadowVirtqueue **shadow_vqs, depending on whether each one should be allocated individually. > VirtIOHandleOutput sw_lm_vq_handler; > Error *migration_blocker; > const VhostOps *vhost_ops; > diff --git a/hw/virtio/vhost-sw-lm-ring.c b/hw/virtio/vhost-sw-lm-ring.c > new file mode 100644 > index 0000000000..0192e77831 > --- /dev/null > +++ b/hw/virtio/vhost-sw-lm-ring.c > @@ -0,0 +1,60 @@ > +/* > + * vhost software live migration ring > + * > + * SPDX-FileCopyrightText: Red Hat, Inc. 2020 > + * SPDX-FileContributor: Author: Eugenio Pérez > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#include "hw/virtio/vhost-sw-lm-ring.h" > +#include "hw/virtio/vhost.h" > + > +#include "standard-headers/linux/vhost_types.h" > +#include "standard-headers/linux/virtio_ring.h" > + > +#include "qemu/event_notifier.h" > + > +typedef struct VhostShadowVirtqueue { > + EventNotifier hdev_notifier; > + VirtQueue *vq; > +} VhostShadowVirtqueue; > + > +static inline bool vhost_vring_should_kick(VhostShadowVirtqueue *vq) > +{ > + return virtio_queue_get_used_notify_split(vq->vq); > +} > + > +bool vhost_vring_kick(VhostShadowVirtqueue *vq) > +{ > + return vhost_vring_should_kick(vq) ? event_notifier_set(&vq->hdev_notifier) > + : true; > +} How is the return value used? event_notifier_set() returns -errno so this function returns false on success, and true when notifications are disabled or event_notifier_set() failed. I'm not sure this return value can be used for anything. > + > +VhostShadowVirtqueue *vhost_sw_lm_shadow_vq(struct vhost_dev *dev, int idx) I see now that this function allocates the VhostShadowVirtqueue. Maybe adding _new() to the name would make that clear? > +{ > + struct vhost_vring_file file = { > + .index = idx > + }; > + VirtQueue *vq = virtio_get_queue(dev->vdev, idx); > + VhostShadowVirtqueue *svq; > + int r; > + > + svq = g_new0(VhostShadowVirtqueue, 1); > + svq->vq = vq; > + > + r = event_notifier_init(&svq->hdev_notifier, 0); > + assert(r == 0); > + > + file.fd = event_notifier_get_fd(&svq->hdev_notifier); > + r = dev->vhost_ops->vhost_set_vring_kick(dev, &file); > + assert(r == 0); > + > + return svq; > +} I guess there are assumptions about the status of the device? Does the virtqueue need to be disabled when this function is called? > + > +void vhost_sw_lm_shadow_vq_free(VhostShadowVirtqueue *vq) > +{ > + event_notifier_cleanup(&vq->hdev_notifier); > + g_free(vq); > +} > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > index 9cbd52a7f1..a55b684b5f 100644 > --- a/hw/virtio/vhost.c > +++ b/hw/virtio/vhost.c > @@ -13,6 +13,8 @@ > * GNU GPL, version 2 or (at your option) any later version. > */ > > +#include "hw/virtio/vhost-sw-lm-ring.h" > + > #include "qemu/osdep.h" > #include "qapi/error.h" > #include "hw/virtio/vhost.h" > @@ -61,6 +63,20 @@ bool vhost_has_free_slot(void) > return slots_limit > used_memslots; > } > > +static struct vhost_dev *vhost_dev_from_virtio(const VirtIODevice *vdev) > +{ > + struct vhost_dev *hdev; > + > + QLIST_FOREACH(hdev, &vhost_devices, entry) { > + if (hdev->vdev == vdev) { > + return hdev; > + } > + } > + > + assert(hdev); > + return NULL; > +} > + > static bool vhost_dev_can_log(const struct vhost_dev *hdev) > { > return hdev->features & (0x1ULL << VHOST_F_LOG_ALL); > @@ -148,6 +164,12 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, > return 0; > } > > +static void vhost_log_sync_nop(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + return; > +} > + > static void vhost_log_sync(MemoryListener *listener, > MemoryRegionSection *section) > { > @@ -928,6 +950,71 @@ static void vhost_log_global_stop(MemoryListener *listener) > } > } > > +static void handle_sw_lm_vq(VirtIODevice *vdev, VirtQueue *vq) > +{ > + struct vhost_dev *hdev = vhost_dev_from_virtio(vdev); If this lookup becomes a performance bottleneck there are other options for determining the vhost_dev. For example VirtIODevice could have a field for stashing the vhost_dev pointer. > + uint16_t idx = virtio_get_queue_index(vq); > + > + VhostShadowVirtqueue *svq = hdev->sw_lm_shadow_vq[idx]; > + > + vhost_vring_kick(svq); > +} I'm a confused. Do we need to pop elements from vq and push equivalent elements onto svq before kicking? Either a todo comment is missing or I misunderstand how this works. > + > +static int vhost_sw_live_migration_stop(struct vhost_dev *dev) > +{ > + int idx; > + > + vhost_dev_enable_notifiers(dev, dev->vdev); > + for (idx = 0; idx < dev->nvqs; ++idx) { > + vhost_sw_lm_shadow_vq_free(dev->sw_lm_shadow_vq[idx]); > + } > + > + return 0; > +} > + > +static int vhost_sw_live_migration_start(struct vhost_dev *dev) > +{ > + int idx; > + > + for (idx = 0; idx < dev->nvqs; ++idx) { > + dev->sw_lm_shadow_vq[idx] = vhost_sw_lm_shadow_vq(dev, idx); > + } > + > + vhost_dev_disable_notifiers(dev, dev->vdev); There is a race condition if the guest kicks the vq while this is happening. The shadow vq hdev_notifier needs to be set so the vhost device checks the virtqueue for requests that slipped in during the race window. > + > + return 0; > +} > + > +static int vhost_sw_live_migration_enable(struct vhost_dev *dev, > + bool enable_lm) > +{ > + if (enable_lm) { > + return vhost_sw_live_migration_start(dev); > + } else { > + return vhost_sw_live_migration_stop(dev); > + } > +} > + > +static void vhost_sw_lm_global_start(MemoryListener *listener) > +{ > + int r; > + > + r = vhost_migration_log(listener, true, vhost_sw_live_migration_enable); > + if (r < 0) { > + abort(); > + } > +} > + > +static void vhost_sw_lm_global_stop(MemoryListener *listener) > +{ > + int r; > + > + r = vhost_migration_log(listener, false, vhost_sw_live_migration_enable); > + if (r < 0) { > + abort(); > + } > +} > + > static void vhost_log_start(MemoryListener *listener, > MemoryRegionSection *section, > int old, int new) > @@ -1334,9 +1421,14 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, > .region_nop = vhost_region_addnop, > .log_start = vhost_log_start, > .log_stop = vhost_log_stop, > - .log_sync = vhost_log_sync, > - .log_global_start = vhost_log_global_start, > - .log_global_stop = vhost_log_global_stop, > + .log_sync = !vhost_dev_can_log(hdev) ? > + vhost_log_sync_nop : > + vhost_log_sync, Why is this change necessary now? It's not clear to me why it was previously okay to call vhost_log_sync(). > + .log_global_start = !vhost_dev_can_log(hdev) ? > + vhost_sw_lm_global_start : > + vhost_log_global_start, > + .log_global_stop = !vhost_dev_can_log(hdev) ? vhost_sw_lm_global_stop : > + vhost_log_global_stop, > .eventfd_add = vhost_eventfd_add, > .eventfd_del = vhost_eventfd_del, > .priority = 10 > @@ -1364,6 +1456,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, > error_free(hdev->migration_blocker); > goto fail_busyloop; > } > + } else { > + hdev->sw_lm_vq_handler = handle_sw_lm_vq; > } > > hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions)); > diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build > index fbff9bc9d4..17419cb13e 100644 > --- a/hw/virtio/meson.build > +++ b/hw/virtio/meson.build > @@ -11,7 +11,7 @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) > > virtio_ss = ss.source_set() > virtio_ss.add(files('virtio.c')) > -virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c')) > +virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-sw-lm-ring.c')) > virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c')) > virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c')) > virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) > -- > 2.18.4 >