On Fri, Nov 20, 2020 at 07:50:45PM +0100, Eugenio Pérez wrote:
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>  hw/virtio/vhost-sw-lm-ring.h |  26 +++++++++
>  include/hw/virtio/vhost.h    |   3 ++
>  hw/virtio/vhost-sw-lm-ring.c |  60 +++++++++++++++++++++
>  hw/virtio/vhost.c            | 100 +++++++++++++++++++++++++++++++++--
>  hw/virtio/meson.build        |   2 +-
>  5 files changed, 187 insertions(+), 4 deletions(-)
>  create mode 100644 hw/virtio/vhost-sw-lm-ring.h
>  create mode 100644 hw/virtio/vhost-sw-lm-ring.c
> 
> diff --git a/hw/virtio/vhost-sw-lm-ring.h b/hw/virtio/vhost-sw-lm-ring.h
> new file mode 100644
> index 0000000000..86dc081b93
> --- /dev/null
> +++ b/hw/virtio/vhost-sw-lm-ring.h
> @@ -0,0 +1,26 @@
> +/*
> + * vhost software live migration ring
> + *
> + * SPDX-FileCopyrightText: Red Hat, Inc. 2020
> + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef VHOST_SW_LM_RING_H
> +#define VHOST_SW_LM_RING_H
> +
> +#include "qemu/osdep.h"
> +
> +#include "hw/virtio/virtio.h"
> +#include "hw/virtio/vhost.h"
> +
> +typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;

Here it's called a shadow virtqueue while the file calls it a
sw-lm-ring. Please use a single name.

> +
> +bool vhost_vring_kick(VhostShadowVirtqueue *vq);

vhost_shadow_vq_kick()?

> +
> +VhostShadowVirtqueue *vhost_sw_lm_shadow_vq(struct vhost_dev *dev, int idx);

vhost_dev_get_shadow_vq()? This could be in include/hw/virtio/vhost.h
with the other vhost_dev_*() functions.

> +
> +void vhost_sw_lm_shadow_vq_free(VhostShadowVirtqueue *vq);

Hmm...now I wonder what the lifecycle is. Does vhost_sw_lm_shadow_vq()
allocate it?

Please add doc comments explaining these functions either in this header
file or in vhost-sw-lm-ring.c.

> +
> +#endif
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index b5b7496537..93cc3f1ae3 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -54,6 +54,8 @@ struct vhost_iommu {
>      QLIST_ENTRY(vhost_iommu) iommu_next;
>  };
>  
> +typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
> +
>  typedef struct VhostDevConfigOps {
>      /* Vhost device config space changed callback
>       */
> @@ -83,6 +85,7 @@ struct vhost_dev {
>      bool started;
>      bool log_enabled;
>      uint64_t log_size;
> +    VhostShadowVirtqueue *sw_lm_shadow_vq[2];

The hardcoded 2 is probably for single-queue virtio-net? I guess this
will eventually become VhostShadowVirtqueue *shadow_vqs or
VhostShadowVirtqueue **shadow_vqs, depending on whether each one should
be allocated individually.

>      VirtIOHandleOutput sw_lm_vq_handler;
>      Error *migration_blocker;
>      const VhostOps *vhost_ops;
> diff --git a/hw/virtio/vhost-sw-lm-ring.c b/hw/virtio/vhost-sw-lm-ring.c
> new file mode 100644
> index 0000000000..0192e77831
> --- /dev/null
> +++ b/hw/virtio/vhost-sw-lm-ring.c
> @@ -0,0 +1,60 @@
> +/*
> + * vhost software live migration ring
> + *
> + * SPDX-FileCopyrightText: Red Hat, Inc. 2020
> + * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "hw/virtio/vhost-sw-lm-ring.h"
> +#include "hw/virtio/vhost.h"
> +
> +#include "standard-headers/linux/vhost_types.h"
> +#include "standard-headers/linux/virtio_ring.h"
> +
> +#include "qemu/event_notifier.h"
> +
> +typedef struct VhostShadowVirtqueue {
> +    EventNotifier hdev_notifier;
> +    VirtQueue *vq;
> +} VhostShadowVirtqueue;
> +
> +static inline bool vhost_vring_should_kick(VhostShadowVirtqueue *vq)
> +{
> +    return virtio_queue_get_used_notify_split(vq->vq);
> +}
> +
> +bool vhost_vring_kick(VhostShadowVirtqueue *vq)
> +{
> +    return vhost_vring_should_kick(vq) ? event_notifier_set(&vq->hdev_notifier)
> +                                       : true;
> +}

How is the return value used? event_notifier_set() returns -errno so
this function returns false on success, and true when notifications are
disabled or event_notifier_set() failed. I'm not sure this return value
can be used for anything.

> +
> +VhostShadowVirtqueue *vhost_sw_lm_shadow_vq(struct vhost_dev *dev, int idx)

I see now that this function allocates the VhostShadowVirtqueue. Maybe
adding _new() to the name would make that clear?

> +{
> +    struct vhost_vring_file file = {
> +        .index = idx
> +    };
> +    VirtQueue *vq = virtio_get_queue(dev->vdev, idx);
> +    VhostShadowVirtqueue *svq;
> +    int r;
> +
> +    svq = g_new0(VhostShadowVirtqueue, 1);
> +    svq->vq = vq;
> +
> +    r = event_notifier_init(&svq->hdev_notifier, 0);
> +    assert(r == 0);
> +
> +    file.fd = event_notifier_get_fd(&svq->hdev_notifier);
> +    r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
> +    assert(r == 0);
> +
> +    return svq;
> +}

I guess there are assumptions about the status of the device? Does the
virtqueue need to be disabled when this function is called?

> +
> +void vhost_sw_lm_shadow_vq_free(VhostShadowVirtqueue *vq)
> +{
> +    event_notifier_cleanup(&vq->hdev_notifier);
> +    g_free(vq);
> +}
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 9cbd52a7f1..a55b684b5f 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -13,6 +13,8 @@
>   * GNU GPL, version 2 or (at your option) any later version.
>   */
>  
> +#include "hw/virtio/vhost-sw-lm-ring.h"
> +
>  #include "qemu/osdep.h"
>  #include "qapi/error.h"
>  #include "hw/virtio/vhost.h"
> @@ -61,6 +63,20 @@ bool vhost_has_free_slot(void)
>      return slots_limit > used_memslots;
>  }
>  
> +static struct vhost_dev *vhost_dev_from_virtio(const VirtIODevice *vdev)
> +{
> +    struct vhost_dev *hdev;
> +
> +    QLIST_FOREACH(hdev, &vhost_devices, entry) {
> +        if (hdev->vdev == vdev) {
> +            return hdev;
> +        }
> +    }
> +
> +    assert(hdev);
> +    return NULL;
> +}
> +
>  static bool vhost_dev_can_log(const struct vhost_dev *hdev)
>  {
>      return hdev->features & (0x1ULL << VHOST_F_LOG_ALL);
> @@ -148,6 +164,12 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
>      return 0;
>  }
>  
> +static void vhost_log_sync_nop(MemoryListener *listener,
> +                               MemoryRegionSection *section)
> +{
> +    return;
> +}
> +
>  static void vhost_log_sync(MemoryListener *listener,
>                            MemoryRegionSection *section)
>  {
> @@ -928,6 +950,71 @@ static void vhost_log_global_stop(MemoryListener *listener)
>      }
>  }
>  
> +static void handle_sw_lm_vq(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +    struct vhost_dev *hdev = vhost_dev_from_virtio(vdev);

If this lookup becomes a performance bottleneck there are other options
for determining the vhost_dev. For example VirtIODevice could have a
field for stashing the vhost_dev pointer.

> +    uint16_t idx = virtio_get_queue_index(vq);
> +
> +    VhostShadowVirtqueue *svq = hdev->sw_lm_shadow_vq[idx];
> +
> +    vhost_vring_kick(svq);
> +}

I'm a confused. Do we need to pop elements from vq and push equivalent
elements onto svq before kicking? Either a todo comment is missing or I
misunderstand how this works.

> +
> +static int vhost_sw_live_migration_stop(struct vhost_dev *dev)
> +{
> +    int idx;
> +
> +    vhost_dev_enable_notifiers(dev, dev->vdev);
> +    for (idx = 0; idx < dev->nvqs; ++idx) {
> +        vhost_sw_lm_shadow_vq_free(dev->sw_lm_shadow_vq[idx]);
> +    }
> +
> +    return 0;
> +}
> +
> +static int vhost_sw_live_migration_start(struct vhost_dev *dev)
> +{
> +    int idx;
> +
> +    for (idx = 0; idx < dev->nvqs; ++idx) {
> +        dev->sw_lm_shadow_vq[idx] = vhost_sw_lm_shadow_vq(dev, idx);
> +    }
> +
> +    vhost_dev_disable_notifiers(dev, dev->vdev);

There is a race condition if the guest kicks the vq while this is
happening. The shadow vq hdev_notifier needs to be set so the vhost
device checks the virtqueue for requests that slipped in during the
race window.

> +
> +    return 0;
> +}
> +
> +static int vhost_sw_live_migration_enable(struct vhost_dev *dev,
> +                                          bool enable_lm)
> +{
> +    if (enable_lm) {
> +        return vhost_sw_live_migration_start(dev);
> +    } else {
> +        return vhost_sw_live_migration_stop(dev);
> +    }
> +}
> +
> +static void vhost_sw_lm_global_start(MemoryListener *listener)
> +{
> +    int r;
> +
> +    r = vhost_migration_log(listener, true, vhost_sw_live_migration_enable);
> +    if (r < 0) {
> +        abort();
> +    }
> +}
> +
> +static void vhost_sw_lm_global_stop(MemoryListener *listener)
> +{
> +    int r;
> +
> +    r = vhost_migration_log(listener, false, vhost_sw_live_migration_enable);
> +    if (r < 0) {
> +        abort();
> +    }
> +}
> +
>  static void vhost_log_start(MemoryListener *listener,
>                              MemoryRegionSection *section,
>                              int old, int new)
> @@ -1334,9 +1421,14 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>          .region_nop = vhost_region_addnop,
>          .log_start = vhost_log_start,
>          .log_stop = vhost_log_stop,
> -        .log_sync = vhost_log_sync,
> -        .log_global_start = vhost_log_global_start,
> -        .log_global_stop = vhost_log_global_stop,
> +        .log_sync = !vhost_dev_can_log(hdev) ?
> +                    vhost_log_sync_nop :
> +                    vhost_log_sync,

Why is this change necessary now? It's not clear to me why it was
previously okay to call vhost_log_sync().

> +        .log_global_start = !vhost_dev_can_log(hdev) ?
> +                            vhost_sw_lm_global_start :
> +                            vhost_log_global_start,
> +        .log_global_stop = !vhost_dev_can_log(hdev) ? vhost_sw_lm_global_stop :
> +                                                      vhost_log_global_stop,
>          .eventfd_add = vhost_eventfd_add,
>          .eventfd_del = vhost_eventfd_del,
>          .priority = 10
> @@ -1364,6 +1456,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>              error_free(hdev->migration_blocker);
>              goto fail_busyloop;
>          }
> +    } else {
> +        hdev->sw_lm_vq_handler = handle_sw_lm_vq;
>      }
>  
>      hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
> diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
> index fbff9bc9d4..17419cb13e 100644
> --- a/hw/virtio/meson.build
> +++ b/hw/virtio/meson.build
> @@ -11,7 +11,7 @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
>  
>  virtio_ss = ss.source_set()
>  virtio_ss.add(files('virtio.c'))
> -virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c'))
> +virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-sw-lm-ring.c'))
>  virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
>  virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
>  virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
> -- 
> 2.18.4
>