All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org
Cc: aarcange@redhat.com, yamahata@private.email.ne.jp,
	quintela@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com,
	david@gibson.dropbear.id.au, yayanghy@cn.fujitsu.com
Subject: [Qemu-devel] [PATCH v6 16/47] Return path: Source handling of return path
Date: Tue, 14 Apr 2015 18:03:42 +0100	[thread overview]
Message-ID: <1429031053-4454-17-git-send-email-dgilbert@redhat.com> (raw)
In-Reply-To: <1429031053-4454-1-git-send-email-dgilbert@redhat.com>

From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

Open a return path, and handle messages that are received upon it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/migration/migration.h |   8 ++
 migration/migration.c         | 177 +++++++++++++++++++++++++++++++++++++++++-
 trace-events                  |  12 +++
 3 files changed, 196 insertions(+), 1 deletion(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 6300ec1..0719d82 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -73,6 +73,14 @@ struct MigrationState
 
     int state;
     MigrationParams params;
+
+    /* State related to return path */
+    struct {
+        QEMUFile     *file;
+        QemuThread    rp_thread;
+        bool          error;
+    } rp_state;
+
     double mbps;
     int64_t total_time;
     int64_t downtime;
diff --git a/migration/migration.c b/migration/migration.c
index db9471d..88355e2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -243,6 +243,23 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
     return head;
 }
 
+/*
+ * Return true if we're already in the middle of a migration
+ * (i.e. any of the active or setup states)
+ */
+static bool migration_already_active(MigrationState *ms)
+{
+    switch (ms->state) {
+    case MIGRATION_STATUS_ACTIVE:
+    case MIGRATION_STATUS_SETUP:
+        return true;
+
+    default:
+        return false;
+
+    }
+}
+
 static void get_xbzrle_cache_stats(MigrationInfo *info)
 {
     if (migrate_use_xbzrle()) {
@@ -365,6 +382,21 @@ static void migrate_set_state(MigrationState *s, int old_state, int new_state)
     }
 }
 
+static void migrate_fd_cleanup_src_rp(MigrationState *ms)
+{
+    QEMUFile *rp = ms->rp_state.file;
+
+    /*
+     * When stuff goes wrong (e.g. failing destination) on the rp, it can get
+     * cleaned up from a few threads; make sure not to do it twice in parallel
+     */
+    rp = atomic_cmpxchg(&ms->rp_state.file, rp, NULL);
+    if (rp) {
+        trace_migrate_fd_cleanup_src_rp();
+        qemu_fclose(rp);
+    }
+}
+
 static void migrate_fd_cleanup(void *opaque)
 {
     MigrationState *s = opaque;
@@ -372,6 +404,8 @@ static void migrate_fd_cleanup(void *opaque)
     qemu_bh_delete(s->cleanup_bh);
     s->cleanup_bh = NULL;
 
+    migrate_fd_cleanup_src_rp(s);
+
     if (s->file) {
         trace_migrate_fd_cleanup();
         qemu_mutex_unlock_iothread();
@@ -410,6 +444,11 @@ static void migrate_fd_cancel(MigrationState *s)
     QEMUFile *f = migrate_get_current()->file;
     trace_migrate_fd_cancel();
 
+    if (s->rp_state.file) {
+        /* shutdown the rp socket, so causing the rp thread to shutdown */
+        qemu_file_shutdown(s->rp_state.file);
+    }
+
     do {
         old_state = s->state;
         if (old_state != MIGRATION_STATUS_SETUP &&
@@ -678,8 +717,144 @@ int64_t migrate_xbzrle_cache_size(void)
     return s->xbzrle_cache_size;
 }
 
-/* migration thread support */
+/*
+ * Something bad happened to the RP stream, mark an error
+ * The caller shall print something to indicate why
+ */
+static void source_return_path_bad(MigrationState *s)
+{
+    s->rp_state.error = true;
+    migrate_fd_cleanup_src_rp(s);
+}
+
+/*
+ * Handles messages sent on the return path towards the source VM
+ *
+ */
+static void *source_return_path_thread(void *opaque)
+{
+    MigrationState *ms = opaque;
+    QEMUFile *rp = ms->rp_state.file;
+    uint16_t expected_len, header_len, header_type;
+    const int max_len = 512;
+    uint8_t buf[max_len];
+    uint32_t tmp32;
+    int res;
+
+    trace_source_return_path_thread_entry();
+    while (rp && !qemu_file_get_error(rp) &&
+        migration_already_active(ms)) {
+        trace_source_return_path_thread_loop_top();
+        header_type = qemu_get_be16(rp);
+        header_len = qemu_get_be16(rp);
+
+        switch (header_type) {
+        case MIG_RP_MSG_SHUT:
+        case MIG_RP_MSG_PONG:
+            expected_len = 4;
+            break;
+
+        default:
+            error_report("RP: Received invalid message 0x%04x length 0x%04x",
+                    header_type, header_len);
+            source_return_path_bad(ms);
+            goto out;
+        }
+
+        if (header_len > expected_len) {
+            error_report("RP: Received message 0x%04x with"
+                    "incorrect length %d expecting %d",
+                    header_type, header_len,
+                    expected_len);
+            source_return_path_bad(ms);
+            goto out;
+        }
+
+        /* We know we've got a valid header by this point */
+        res = qemu_get_buffer(rp, buf, header_len);
+        if (res != header_len) {
+            trace_source_return_path_thread_failed_read_cmd_data();
+            source_return_path_bad(ms);
+            goto out;
+        }
+
+        /* OK, we have the message and the data */
+        switch (header_type) {
+        case MIG_RP_MSG_SHUT:
+            tmp32 = be32_to_cpup((uint32_t *)buf);
+            trace_source_return_path_thread_shut(tmp32);
+            if (tmp32) {
+                error_report("RP: Sibling indicated error %d", tmp32);
+                source_return_path_bad(ms);
+            }
+            /*
+             * We'll let the main thread deal with closing the RP
+             * we could do a shutdown(2) on it, but we're the only user
+             * anyway, so there's nothing gained.
+             */
+            goto out;
+
+        case MIG_RP_MSG_PONG:
+            tmp32 = be32_to_cpup((uint32_t *)buf);
+            trace_source_return_path_thread_pong(tmp32);
+            break;
+
+        default:
+            break;
+        }
+    }
+    if (rp && qemu_file_get_error(rp)) {
+        trace_source_return_path_thread_bad_end();
+        source_return_path_bad(ms);
+    }
+
+    trace_source_return_path_thread_end();
+out:
+    return NULL;
+}
+
+__attribute__ (( unused )) /* Until later in patch series */
+static int open_return_path_on_source(MigrationState *ms)
+{
+
+    ms->rp_state.file = qemu_file_get_return_path(ms->file);
+    if (!ms->rp_state.file) {
+        return -1;
+    }
+
+    trace_open_return_path_on_source();
+    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
+                       source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
 
+    trace_open_return_path_on_source_continue();
+
+    return 0;
+}
+
+__attribute__ (( unused )) /* Until later in patch series */
+/* Returns 0 if the RP was ok, otherwise there was an error on the RP */
+static int await_return_path_close_on_source(MigrationState *ms)
+{
+    /*
+     * If this is a normal exit then the destination will send a SHUT and the
+     * rp_thread will exit, however if there's an error we need to cause
+     * it to exit, which we can do by a shutdown.
+     * (canceling must also shutdown to stop us getting stuck here if
+     * the destination died at just the wrong place)
+     */
+    if (qemu_file_get_error(ms->file) && ms->rp_state.file) {
+        qemu_file_shutdown(ms->rp_state.file);
+    }
+    trace_await_return_path_close_on_source_joining();
+    qemu_thread_join(&ms->rp_state.rp_thread);
+    trace_await_return_path_close_on_source_close();
+    return ms->rp_state.error;
+}
+
+/*
+ * Master migration thread on the source VM.
+ * It drives the migration and pumps the data down the outgoing channel.
+ */
 static void *migration_thread(void *opaque)
 {
     MigrationState *s = opaque;
diff --git a/trace-events b/trace-events
index 9f0a071..eb40e61 100644
--- a/trace-events
+++ b/trace-events
@@ -1378,12 +1378,24 @@ flic_no_device_api(int err) "flic: no Device Contral API support %d"
 flic_reset_failed(int err) "flic: reset failed %d"
 
 # migration.c
+await_return_path_close_on_source_close(void) ""
+await_return_path_close_on_source_joining(void) ""
 migrate_set_state(int new_state) "new state %d"
 migrate_fd_cleanup(void) ""
+migrate_fd_cleanup_src_rp(void) ""
 migrate_fd_error(void) ""
 migrate_fd_cancel(void) ""
 migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max %" PRIu64
 migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
+open_return_path_on_source(void) ""
+open_return_path_on_source_continue(void) ""
+source_return_path_thread_bad_end(void) ""
+source_return_path_thread_end(void) ""
+source_return_path_thread_entry(void) ""
+source_return_path_thread_failed_read_cmd_data(void) ""
+source_return_path_thread_loop_top(void) ""
+source_return_path_thread_pong(uint32_t val) "%x"
+source_return_path_thread_shut(uint32_t val) "%x"
 migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64
 
 # migration/rdma.c
-- 
2.1.0

  parent reply	other threads:[~2015-04-14 17:05 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-14 17:03 [Qemu-devel] [PATCH v6 00/47] Postcopy implementation Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 01/47] Start documenting how postcopy works Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 02/47] Split header writing out of qemu_savevm_state_begin Dr. David Alan Gilbert (git)
2015-05-11 11:16   ` Amit Shah
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 03/47] qemu_ram_foreach_block: pass up error value, and down the ramblock name Dr. David Alan Gilbert (git)
2015-05-15 10:38   ` Amit Shah
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 04/47] Add qemu_get_counted_string to read a string prefixed by a count byte Dr. David Alan Gilbert (git)
2015-05-15 13:50   ` Amit Shah
2015-05-15 14:06     ` Dr. David Alan Gilbert
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 05/47] Create MigrationIncomingState Dr. David Alan Gilbert (git)
2015-05-18  6:58   ` Amit Shah
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 06/47] Provide runtime Target page information Dr. David Alan Gilbert (git)
2015-05-18  7:06   ` Amit Shah
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 07/47] Move copy out of qemu_peek_buffer Dr. David Alan Gilbert (git)
2015-05-21  6:47   ` Amit Shah
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 08/47] Add qemu_get_buffer_less_copy to avoid copies some of the time Dr. David Alan Gilbert (git)
2015-05-21  7:09   ` Amit Shah
2015-05-21  8:45     ` Dr. David Alan Gilbert
2015-05-21  8:58       ` Amit Shah
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 09/47] Add wrapper for setting blocking status on a QEMUFile Dr. David Alan Gilbert (git)
2015-05-18  7:35   ` Amit Shah
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 10/47] Rename save_live_complete to save_live_complete_precopy Dr. David Alan Gilbert (git)
2015-05-18  7:35   ` Amit Shah
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 11/47] Return path: Open a return path on QEMUFile for sockets Dr. David Alan Gilbert (git)
2015-06-10  9:00   ` Amit Shah
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 12/47] Return path: socket_writev_buffer: Block even on non-blocking fd's Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 13/47] Migration commands Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 14/47] Return path: Control commands Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 15/47] Return path: Send responses from destination to source Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` Dr. David Alan Gilbert (git) [this message]
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 17/47] ram_debug_dump_bitmap: Dump a migration bitmap as text Dr. David Alan Gilbert (git)
2015-05-21  9:21   ` Amit Shah
2015-05-21 10:10     ` Dr. David Alan Gilbert
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 18/47] Move loadvm_handlers into MigrationIncomingState Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 19/47] Rework loadvm path for subloops Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 20/47] Add migration-capability boolean for postcopy-ram Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 21/47] Add wrappers and handlers for sending/receiving the postcopy-ram migration messages Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 22/47] MIG_CMD_PACKAGED: Send a packaged chunk of migration stream Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 23/47] migrate_init: Call from savevm Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 24/47] Modify save_live_pending for postcopy Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 25/47] postcopy: OS support test Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 26/47] migrate_start_postcopy: Command to trigger transition to postcopy Dr. David Alan Gilbert (git)
2015-04-14 17:38   ` Eric Blake
2015-04-14 17:40     ` Dr. David Alan Gilbert
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 27/47] MIGRATION_STATUS_POSTCOPY_ACTIVE: Add new migration state Dr. David Alan Gilbert (git)
2015-04-14 17:40   ` Eric Blake
2015-04-14 18:00     ` Dr. David Alan Gilbert
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 28/47] Add qemu_savevm_state_complete_postcopy Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 29/47] Postcopy: Maintain sentmap and calculate discard Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 30/47] postcopy: Incoming initialisation Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 31/47] postcopy: ram_enable_notify to switch on userfault Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 32/47] Postcopy: Postcopy startup in migration thread Dr. David Alan Gilbert (git)
2015-04-14 17:03 ` [Qemu-devel] [PATCH v6 33/47] Postcopy end in migration_thread Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 34/47] Page request: Add MIG_RP_MSG_REQ_PAGES reverse command Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 35/47] Page request: Process incoming page request Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 36/47] Page request: Consume pages off the post-copy queue Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 37/47] postcopy_ram.c: place_page and helpers Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 38/47] Postcopy: Use helpers to map pages during migration Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 39/47] qemu_ram_block_from_host Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 40/47] Don't sync dirty bitmaps in postcopy Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 41/47] Host page!=target page: Cleanup bitmaps Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 42/47] Postcopy; Handle userfault requests Dr. David Alan Gilbert (git)
2015-05-25  9:18   ` zhanghailiang
2015-05-26  9:50     ` Dr. David Alan Gilbert
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 43/47] Start up a postcopy/listener thread ready for incoming page data Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 44/47] postcopy: Wire up loadvm_postcopy_handle_ commands Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 45/47] End of migration for postcopy Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 46/47] Disable mlock around incoming postcopy Dr. David Alan Gilbert (git)
2015-04-14 17:04 ` [Qemu-devel] [PATCH v6 47/47] Inhibit ballooning during postcopy Dr. David Alan Gilbert (git)
2015-04-27  8:04 ` [Qemu-devel] [PATCH v6 00/47] Postcopy implementation Li, Liang Z
2015-04-29 17:23   ` Dr. David Alan Gilbert
2015-04-30  1:09     ` Li, Liang Z
     [not found]       ` <20150505150112.GM2126@work-vm>
     [not found]         ` <F2CBF3009FA73547804AE4C663CAB28E50F0E1@shsmsx102.ccr.corp.intel.com>
     [not found]           ` <20150506083056.GB2204@work-vm>
2015-05-07  1:21             ` Li, Liang Z
2015-05-07  8:01               ` Dr. David Alan Gilbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1429031053-4454-17-git-send-email-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=amit.shah@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=yamahata@private.email.ne.jp \
    --cc=yayanghy@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.