All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org, quintela@redhat.com, peterx@redhat.com,
	marcel.apfelbaum@gmail.com, wei.w.wang@intel.com,
	yury-kotov@yandex-team.ru, chen.zhang@intel.com
Subject: [Qemu-devel] [PULL 18/21] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
Date: Tue,  5 Mar 2019 18:15:59 +0000	[thread overview]
Message-ID: <20190305181602.9051-19-dgilbert@redhat.com> (raw)
In-Reply-To: <20190305181602.9051-1-dgilbert@redhat.com>

From: Wei Wang <wei.w.wang@intel.com>

The new feature enables the virtio-balloon device to receive hints of
guest free pages from the free page vq.

A notifier is registered to the migration precopy notifier chain. The
notifier calls free_page_start after the migration thread syncs the dirty
bitmap, so that the free page optimization starts to clear bits of free
pages from the bitmap. It calls the free_page_stop before the migration
thread syncs the bitmap, which is the end of the current round of ram
save. The free_page_stop is also called to stop the optimization in the
case when there is an error occurred in the process of ram saving.

Note: balloon will report pages which were free at the time of this call.
As the reporting happens asynchronously, dirty bit logging must be
enabled before this free_page_start call is made. Guest reporting must be
disabled before the migration dirty bitmap is synchronized.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Peter Xu <peterx@redhat.com>
Message-Id: <1544516693-5395-8-git-send-email-wei.w.wang@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
  dgilbert: Dropped kernel header update, fixed up CMD_ID_* name change
---
 hw/virtio/virtio-balloon.c         | 263 +++++++++++++++++++++++++++++
 include/hw/virtio/virtio-balloon.h |  28 ++-
 2 files changed, 290 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d3f2913a85..e3a65940ef 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -27,6 +27,7 @@
 #include "qapi/visitor.h"
 #include "trace.h"
 #include "qemu/error-report.h"
+#include "migration/misc.h"
 
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
@@ -378,6 +379,184 @@ out:
     }
 }
 
+static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev,
+                                               VirtQueue *vq)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+    qemu_bh_schedule(s->free_page_bh);
+}
+
+static bool get_free_page_hints(VirtIOBalloon *dev)
+{
+    VirtQueueElement *elem;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtQueue *vq = dev->free_page_vq;
+
+    while (dev->block_iothread) {
+        qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
+    }
+
+    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+    if (!elem) {
+        return false;
+    }
+
+    if (elem->out_num) {
+        uint32_t id;
+        size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
+                                 &id, sizeof(id));
+        virtqueue_push(vq, elem, size);
+        g_free(elem);
+
+        virtio_tswap32s(vdev, &id);
+        if (unlikely(size != sizeof(id))) {
+            virtio_error(vdev, "received an incorrect cmd id");
+            return false;
+        }
+        if (id == dev->free_page_report_cmd_id) {
+            dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
+        } else {
+            /*
+             * Stop the optimization only when it has started. This
+             * avoids a stale stop sign for the previous command.
+             */
+            if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
+                dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
+            }
+        }
+    }
+
+    if (elem->in_num) {
+        if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
+            qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
+                                      elem->in_sg[0].iov_len);
+        }
+        virtqueue_push(vq, elem, 1);
+        g_free(elem);
+    }
+
+    return true;
+}
+
+static void virtio_ballloon_get_free_page_hints(void *opaque)
+{
+    VirtIOBalloon *dev = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtQueue *vq = dev->free_page_vq;
+    bool continue_to_get_hints;
+
+    do {
+        qemu_mutex_lock(&dev->free_page_lock);
+        virtio_queue_set_notification(vq, 0);
+        continue_to_get_hints = get_free_page_hints(dev);
+        qemu_mutex_unlock(&dev->free_page_lock);
+        virtio_notify(vdev, vq);
+      /*
+       * Start to poll the vq once the reporting started. Otherwise, continue
+       * only when there are entries on the vq, which need to be given back.
+       */
+    } while (continue_to_get_hints ||
+             dev->free_page_report_status == FREE_PAGE_REPORT_S_START);
+    virtio_queue_set_notification(vq, 1);
+}
+
+static bool virtio_balloon_free_page_support(void *opaque)
+{
+    VirtIOBalloon *s = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
+}
+
+static void virtio_balloon_free_page_start(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    /* For the stop and copy phase, we don't need to start the optimization */
+    if (!vdev->vm_running) {
+        return;
+    }
+
+    if (s->free_page_report_cmd_id == UINT_MAX) {
+        s->free_page_report_cmd_id =
+                       VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
+    } else {
+        s->free_page_report_cmd_id++;
+    }
+
+    s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
+    virtio_notify_config(vdev);
+}
+
+static void virtio_balloon_free_page_stop(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) {
+        /*
+         * The lock also guarantees us that the
+         * virtio_ballloon_get_free_page_hints exits after the
+         * free_page_report_status is set to S_STOP.
+         */
+        qemu_mutex_lock(&s->free_page_lock);
+        /*
+         * The guest hasn't done the reporting, so host sends a notification
+         * to the guest to actively stop the reporting.
+         */
+        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
+        qemu_mutex_unlock(&s->free_page_lock);
+        virtio_notify_config(vdev);
+    }
+}
+
+static void virtio_balloon_free_page_done(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    s->free_page_report_status = FREE_PAGE_REPORT_S_DONE;
+    virtio_notify_config(vdev);
+}
+
+static int
+virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data)
+{
+    VirtIOBalloon *dev = container_of(n, VirtIOBalloon,
+                                      free_page_report_notify);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    PrecopyNotifyData *pnd = data;
+
+    if (!virtio_balloon_free_page_support(dev)) {
+        /*
+         * This is an optimization provided to migration, so just return 0 to
+         * have the normal migration process not affected when this feature is
+         * not supported.
+         */
+        return 0;
+    }
+
+    switch (pnd->reason) {
+    case PRECOPY_NOTIFY_SETUP:
+        precopy_enable_free_page_optimization();
+        break;
+    case PRECOPY_NOTIFY_COMPLETE:
+    case PRECOPY_NOTIFY_CLEANUP:
+    case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
+        virtio_balloon_free_page_stop(dev);
+        break;
+    case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
+        if (vdev->vm_running) {
+            virtio_balloon_free_page_start(dev);
+        } else {
+            virtio_balloon_free_page_done(dev);
+        }
+        break;
+    default:
+        virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
+    }
+
+    return 0;
+}
+
 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 {
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
@@ -386,6 +565,17 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
     config.num_pages = cpu_to_le32(dev->num_pages);
     config.actual = cpu_to_le32(dev->actual);
 
+    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
+        config.free_page_report_cmd_id =
+                       cpu_to_le32(dev->free_page_report_cmd_id);
+    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
+        config.free_page_report_cmd_id =
+                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
+    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
+        config.free_page_report_cmd_id =
+                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
+    }
+
     trace_virtio_balloon_get_config(config.num_pages, config.actual);
     memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
 }
@@ -446,6 +636,7 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
     f |= dev->host_features;
     virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
+
     return f;
 }
 
@@ -482,6 +673,18 @@ static int virtio_balloon_post_load_device(void *opaque, int version_id)
     return 0;
 }
 
+static const VMStateDescription vmstate_virtio_balloon_free_page_report = {
+    .name = "virtio-balloon-device/free-page-report",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = virtio_balloon_free_page_support,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon),
+        VMSTATE_UINT32(free_page_report_status, VirtIOBalloon),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_virtio_balloon_device = {
     .name = "virtio-balloon-device",
     .version_id = 1,
@@ -492,6 +695,10 @@ static const VMStateDescription vmstate_virtio_balloon_device = {
         VMSTATE_UINT32(actual, VirtIOBalloon),
         VMSTATE_END_OF_LIST()
     },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_balloon_free_page_report,
+        NULL
+    }
 };
 
 static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
@@ -516,6 +723,29 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
 
+    if (virtio_has_feature(s->host_features,
+                           VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+        s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
+                                           virtio_balloon_handle_free_page_vq);
+        s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
+        s->free_page_report_cmd_id =
+                           VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
+        s->free_page_report_notify.notify =
+                                       virtio_balloon_free_page_report_notify;
+        precopy_add_notifier(&s->free_page_report_notify);
+        if (s->iothread) {
+            object_ref(OBJECT(s->iothread));
+            s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
+                                       virtio_ballloon_get_free_page_hints, s);
+            qemu_mutex_init(&s->free_page_lock);
+            qemu_cond_init(&s->free_page_cond);
+            s->block_iothread = false;
+        } else {
+            /* Simply disable this feature if the iothread wasn't created. */
+            s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT);
+            virtio_error(vdev, "iothread is missing");
+        }
+    }
     reset_stats(s);
 }
 
@@ -524,6 +754,11 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
 
+    if (virtio_balloon_free_page_support(s)) {
+        qemu_bh_delete(s->free_page_bh);
+        virtio_balloon_free_page_stop(s);
+        precopy_remove_notifier(&s->free_page_report_notify);
+    }
     balloon_stats_destroy_timer(s);
     qemu_remove_balloon_handler(s);
     virtio_cleanup(vdev);
@@ -533,6 +768,10 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
 
+    if (virtio_balloon_free_page_support(s)) {
+        virtio_balloon_free_page_stop(s);
+    }
+
     if (s->stats_vq_elem != NULL) {
         virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
         g_free(s->stats_vq_elem);
@@ -550,6 +789,26 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
          * was stopped */
         virtio_balloon_receive_stats(vdev, s->svq);
     }
+
+    if (virtio_balloon_free_page_support(s)) {
+        /*
+         * The VM is woken up and the iothread was blocked, so signal it to
+         * continue.
+         */
+        if (vdev->vm_running && s->block_iothread) {
+            qemu_mutex_lock(&s->free_page_lock);
+            s->block_iothread = false;
+            qemu_cond_signal(&s->free_page_cond);
+            qemu_mutex_unlock(&s->free_page_lock);
+        }
+
+        /* The VM is stopped, block the iothread. */
+        if (!vdev->vm_running) {
+            qemu_mutex_lock(&s->free_page_lock);
+            s->block_iothread = true;
+            qemu_mutex_unlock(&s->free_page_lock);
+        }
+    }
 }
 
 static void virtio_balloon_instance_init(Object *obj)
@@ -578,6 +837,10 @@ static const VMStateDescription vmstate_virtio_balloon = {
 static Property virtio_balloon_properties[] = {
     DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
                     VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
+    DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
+                    VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
+    DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
+                     IOThread *),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 99dcd6d105..1afafb12f6 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -17,11 +17,14 @@
 
 #include "standard-headers/linux/virtio_balloon.h"
 #include "hw/virtio/virtio.h"
+#include "sysemu/iothread.h"
 
 #define TYPE_VIRTIO_BALLOON "virtio-balloon-device"
 #define VIRTIO_BALLOON(obj) \
         OBJECT_CHECK(VirtIOBalloon, (obj), TYPE_VIRTIO_BALLOON)
 
+#define VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN 0x80000000
+
 typedef struct virtio_balloon_stat VirtIOBalloonStat;
 
 typedef struct virtio_balloon_stat_modern {
@@ -32,15 +35,38 @@ typedef struct virtio_balloon_stat_modern {
 
 typedef struct PartiallyBalloonedPage PartiallyBalloonedPage;
 
+enum virtio_balloon_free_page_report_status {
+    FREE_PAGE_REPORT_S_STOP = 0,
+    FREE_PAGE_REPORT_S_REQUESTED = 1,
+    FREE_PAGE_REPORT_S_START = 2,
+    FREE_PAGE_REPORT_S_DONE = 3,
+};
+
 typedef struct VirtIOBalloon {
     VirtIODevice parent_obj;
-    VirtQueue *ivq, *dvq, *svq;
+    VirtQueue *ivq, *dvq, *svq, *free_page_vq;
+    uint32_t free_page_report_status;
     uint32_t num_pages;
     uint32_t actual;
+    uint32_t free_page_report_cmd_id;
     uint64_t stats[VIRTIO_BALLOON_S_NR];
     VirtQueueElement *stats_vq_elem;
     size_t stats_vq_offset;
     QEMUTimer *stats_timer;
+    IOThread *iothread;
+    QEMUBH *free_page_bh;
+    /*
+     * Lock to synchronize threads to access the free page reporting related
+     * fields (e.g. free_page_report_status).
+     */
+    QemuMutex free_page_lock;
+    QemuCond  free_page_cond;
+    /*
+     * Set to block iothread to continue reading free page hints as the VM is
+     * stopped.
+     */
+    bool block_iothread;
+    NotifierWithReturn free_page_report_notify;
     int64_t stats_last_update;
     int64_t stats_poll_interval;
     uint32_t host_features;
-- 
2.20.1

  parent reply	other threads:[~2019-03-05 18:16 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-05 18:15 [Qemu-devel] [PULL 00/21] migration queue Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 01/21] migration: Fix cancel state Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 02/21] migration/rdma: Fix qemu_rdma_cleanup null check Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 03/21] migration/rdma: clang compilation fix Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 04/21] exec: Change RAMBlockIterFunc definition Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 05/21] migration: Introduce ignore-shared capability Dr. David Alan Gilbert (git)
2019-03-09 17:25   ` Markus Armbruster
2019-03-05 18:15 ` [Qemu-devel] [PULL 06/21] migration: Add an ability to ignore shared RAM blocks Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 07/21] tests/migration-test: Add a test for ignore-shared capability Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 08/21] migration: Add capabilities validation Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 09/21] tests: Add migration xbzrle test Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 10/21] migration: Create socket-address parameter Dr. David Alan Gilbert (git)
2019-03-05 18:47   ` Eric Blake
2019-03-06  5:21     ` Markus Armbruster
2019-03-06 10:40       ` Dr. David Alan Gilbert
2019-03-05 18:15 ` [Qemu-devel] [PULL 11/21] tests: Add basic migration precopy tcp test Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 12/21] bitmap: fix bitmap_count_one Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 13/21] bitmap: bitmap_count_one_with_offset Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 14/21] migration: use bitmap_mutex in migration_bitmap_clear_dirty Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 15/21] migration: API to clear bits of guest free pages from the dirty bitmap Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 16/21] migration/ram.c: add a notifier chain for precopy Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` [Qemu-devel] [PULL 17/21] migration/ram.c: add the free page optimization enable flag Dr. David Alan Gilbert (git)
2019-03-05 18:15 ` Dr. David Alan Gilbert (git) [this message]
2019-03-05 18:16 ` [Qemu-devel] [PULL 19/21] Migration/colo.c: Fix double close bug when occur COLO failover Dr. David Alan Gilbert (git)
2019-03-05 18:16 ` [Qemu-devel] [PULL 20/21] Migration/colo.c: Make COLO node running after failover Dr. David Alan Gilbert (git)
2019-03-05 18:16 ` [Qemu-devel] [PULL 21/21] qapi/migration.json: Remove a variable that doesn't exist in example Dr. David Alan Gilbert (git)
2019-03-05 18:47 ` [Qemu-devel] [PULL 00/21] migration queue no-reply
2019-03-05 18:49 ` Eric Blake
2019-03-05 18:51 ` no-reply
2019-03-06 11:17 ` Philippe Mathieu-Daudé
2019-03-06 12:52   ` Markus Armbruster

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190305181602.9051-19-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=chen.zhang@intel.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=wei.w.wang@intel.com \
    --cc=yury-kotov@yandex-team.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.