All of lore.kernel.org
 help / color / mirror / Atom feed
From: Juan Quintela <quintela@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Paolo Bonzini" <pbonzini@redhat.com>,
	"David Hildenbrand" <david@redhat.com>,
	"Laurent Vivier" <laurent@vivier.eu>,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Fam Zheng" <fam@euphon.net>,
	qemu-block@nongnu.org,
	"Dr. David Alan Gilbert" <dgilbert@redhat.com>,
	"Thomas Huth" <thuth@redhat.com>,
	"Philippe Mathieu-Daudé" <philmd@linaro.org>,
	qemu-trivial@nongnu.org, "Michael Tokarev" <mjt@tls.msk.ru>,
	"Daniel P. Berrangé" <berrange@redhat.com>,
	"Marc-André Lureau" <marcandre.lureau@redhat.com>,
	"Peter Xu" <peterx@redhat.com>,
	"Juan Quintela" <quintela@redhat.com>
Subject: [PATCH 27/30] migration: Send requested page directly in rp-return thread
Date: Tue, 15 Nov 2022 13:12:23 +0100	[thread overview]
Message-ID: <20221115121226.26609-28-quintela@redhat.com> (raw)
In-Reply-To: <20221115121226.26609-1-quintela@redhat.com>

From: Peter Xu <peterx@redhat.com>

With all the facilities ready, send the requested page directly in the
rp-return thread rather than queuing it in the request queue, if and only
if postcopy preempt is enabled.  It can achieve so because it uses separate
channel for sending urgent pages.  The only shared data is bitmap and it's
protected by the bitmap_mutex.

Note that since we're moving the ownership of the urgent channel from the
migration thread to rp thread it also means the rp thread is responsible
for managing the qemufile, e.g. properly close it when pausing migration
happens.  For this, let migration_release_from_dst_file to cover shutdown
of the urgent channel too, renaming it as migration_release_dst_files() to
better show what it does.

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 migration/migration.c |  35 +++++++------
 migration/ram.c       | 112 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+), 16 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 1f95877fb4..42f36c1e2c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2868,8 +2868,11 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
     return 0;
 }
 
-/* Release ms->rp_state.from_dst_file in a safe way */
-static void migration_release_from_dst_file(MigrationState *ms)
+/*
+ * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if
+ * existed) in a safe way.
+ */
+static void migration_release_dst_files(MigrationState *ms)
 {
     QEMUFile *file;
 
@@ -2882,6 +2885,18 @@ static void migration_release_from_dst_file(MigrationState *ms)
         ms->rp_state.from_dst_file = NULL;
     }
 
+    /*
+     * Do the same to postcopy fast path socket too if there is.  No
+     * locking needed because this qemufile should only be managed by
+     * return path thread.
+     */
+    if (ms->postcopy_qemufile_src) {
+        migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src);
+        qemu_file_shutdown(ms->postcopy_qemufile_src);
+        qemu_fclose(ms->postcopy_qemufile_src);
+        ms->postcopy_qemufile_src = NULL;
+    }
+
     qemu_fclose(file);
 }
 
@@ -3026,7 +3041,7 @@ out:
              * Maybe there is something we can do: it looks like a
              * network down issue, and we pause for a recovery.
              */
-            migration_release_from_dst_file(ms);
+            migration_release_dst_files(ms);
             rp = NULL;
             if (postcopy_pause_return_path_thread(ms)) {
                 /*
@@ -3044,7 +3059,7 @@ out:
     }
 
     trace_source_return_path_thread_end();
-    migration_release_from_dst_file(ms);
+    migration_release_dst_files(ms);
     rcu_unregister_thread();
     return NULL;
 }
@@ -3567,18 +3582,6 @@ static MigThrError postcopy_pause(MigrationState *s)
         qemu_file_shutdown(file);
         qemu_fclose(file);
 
-        /*
-         * Do the same to postcopy fast path socket too if there is.  No
-         * locking needed because no racer as long as we do this before setting
-         * status to paused.
-         */
-        if (s->postcopy_qemufile_src) {
-            migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
-            qemu_file_shutdown(s->postcopy_qemufile_src);
-            qemu_fclose(s->postcopy_qemufile_src);
-            s->postcopy_qemufile_src = NULL;
-        }
-
         migrate_set_state(&s->state, s->state,
                           MIGRATION_STATUS_POSTCOPY_PAUSED);
 
diff --git a/migration/ram.c b/migration/ram.c
index dbdde5a6a5..5dc221a2fc 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -574,6 +574,8 @@ static QemuThread *decompress_threads;
 static QemuMutex decomp_done_lock;
 static QemuCond decomp_done_cond;
 
+static int ram_save_host_page_urgent(PageSearchStatus *pss);
+
 static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
                                  ram_addr_t offset, uint8_t *source_buf);
 
@@ -588,6 +590,16 @@ static void pss_init(PageSearchStatus *pss, RAMBlock *rb, ram_addr_t page)
     pss->complete_round = false;
 }
 
+/*
+ * Check whether two PSSs are actively sending the same page.  Return true
+ * if it is, false otherwise.
+ */
+static bool pss_overlap(PageSearchStatus *pss1, PageSearchStatus *pss2)
+{
+    return pss1->host_page_sending && pss2->host_page_sending &&
+        (pss1->host_page_start == pss2->host_page_start);
+}
+
 static void *do_data_compress(void *opaque)
 {
     CompressParam *param = opaque;
@@ -2288,6 +2300,57 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
         return -1;
     }
 
+    /*
+     * When with postcopy preempt, we send back the page directly in the
+     * rp-return thread.
+     */
+    if (postcopy_preempt_active()) {
+        ram_addr_t page_start = start >> TARGET_PAGE_BITS;
+        size_t page_size = qemu_ram_pagesize(ramblock);
+        PageSearchStatus *pss = &ram_state->pss[RAM_CHANNEL_POSTCOPY];
+        int ret = 0;
+
+        qemu_mutex_lock(&rs->bitmap_mutex);
+
+        pss_init(pss, ramblock, page_start);
+        /*
+         * Always use the preempt channel, and make sure it's there.  It's
+         * safe to access without lock, because when rp-thread is running
+         * we should be the only one who operates on the qemufile
+         */
+        pss->pss_channel = migrate_get_current()->postcopy_qemufile_src;
+        pss->postcopy_requested = true;
+        assert(pss->pss_channel);
+
+        /*
+         * It must be either one or multiple of host page size.  Just
+         * assert; if something wrong we're mostly split brain anyway.
+         */
+        assert(len % page_size == 0);
+        while (len) {
+            if (ram_save_host_page_urgent(pss)) {
+                error_report("%s: ram_save_host_page_urgent() failed: "
+                             "ramblock=%s, start_addr=0x"RAM_ADDR_FMT,
+                             __func__, ramblock->idstr, start);
+                ret = -1;
+                break;
+            }
+            /*
+             * NOTE: after ram_save_host_page_urgent() succeeded, pss->page
+             * will automatically be moved and point to the next host page
+             * we're going to send, so no need to update here.
+             *
+             * Normally QEMU never sends >1 host page in requests, so
+             * logically we don't even need that as the loop should only
+             * run once, but just to be consistent.
+             */
+            len -= page_size;
+        };
+        qemu_mutex_unlock(&rs->bitmap_mutex);
+
+        return ret;
+    }
+
     struct RAMSrcPageRequest *new_entry =
         g_new0(struct RAMSrcPageRequest, 1);
     new_entry->rb = ramblock;
@@ -2565,6 +2628,55 @@ static void pss_host_page_finish(PageSearchStatus *pss)
     pss->host_page_start = pss->host_page_end = 0;
 }
 
+/*
+ * Send an urgent host page specified by `pss'.  Need to be called with
+ * bitmap_mutex held.
+ *
+ * Returns 0 if save host page succeeded, false otherwise.
+ */
+static int ram_save_host_page_urgent(PageSearchStatus *pss)
+{
+    bool page_dirty, sent = false;
+    RAMState *rs = ram_state;
+    int ret = 0;
+
+    trace_postcopy_preempt_send_host_page(pss->block->idstr, pss->page);
+    pss_host_page_prepare(pss);
+
+    /*
+     * If precopy is sending the same page, let it be done in precopy, or
+     * we could send the same page in two channels and none of them will
+     * receive the whole page.
+     */
+    if (pss_overlap(pss, &ram_state->pss[RAM_CHANNEL_PRECOPY])) {
+        trace_postcopy_preempt_hit(pss->block->idstr,
+                                   pss->page << TARGET_PAGE_BITS);
+        return 0;
+    }
+
+    do {
+        page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page);
+
+        if (page_dirty) {
+            /* Be strict to return code; it must be 1, or what else? */
+            if (ram_save_target_page(rs, pss) != 1) {
+                error_report_once("%s: ram_save_target_page failed", __func__);
+                ret = -1;
+                goto out;
+            }
+            sent = true;
+        }
+        pss_find_next_dirty(pss);
+    } while (pss_within_range(pss));
+out:
+    pss_host_page_finish(pss);
+    /* For urgent requests, flush immediately if sent */
+    if (sent) {
+        qemu_fflush(pss->pss_channel);
+    }
+    return ret;
+}
+
 /**
  * ram_save_host_page: save a whole host page
  *
-- 
2.38.1



  parent reply	other threads:[~2022-11-15 12:29 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-15 12:11 [PATCH 00/30] Migration PULL request Juan Quintela
2022-11-15 12:11 ` [PATCH 01/30] migration/channel-block: fix return value for qio_channel_block_{readv, writev} Juan Quintela
2022-11-15 12:11 ` [PATCH 02/30] migration/multifd/zero-copy: Create helper function for flushing Juan Quintela
2022-11-15 12:11 ` [PATCH 03/30] migration: check magic value for deciding the mapping of channels Juan Quintela
2022-11-15 12:12 ` [PATCH 04/30] multifd: Create page_size fields into both MultiFD{Recv, Send}Params Juan Quintela
2022-11-15 12:12 ` [PATCH 05/30] multifd: Create page_count " Juan Quintela
2022-11-15 12:12 ` [PATCH 06/30] migration: Export ram_transferred_ram() Juan Quintela
2022-11-15 12:12 ` [PATCH 07/30] migration: Export ram_release_page() Juan Quintela
2022-11-15 12:12 ` [PATCH 08/30] Update AVX512 support for xbzrle_encode_buffer Juan Quintela
2022-11-15 12:12 ` [PATCH 09/30] Unit test code and benchmark code Juan Quintela
2022-11-15 12:12 ` [PATCH 10/30] migration: Fix possible infinite loop of ram save process Juan Quintela
2022-11-15 12:12 ` [PATCH 11/30] migration: Fix race on qemu_file_shutdown() Juan Quintela
2022-11-15 12:12 ` [PATCH 12/30] migration: Disallow postcopy preempt to be used with compress Juan Quintela
2022-11-15 12:12 ` [PATCH 13/30] migration: Use non-atomic ops for clear log bitmap Juan Quintela
2022-11-15 12:12 ` [PATCH 14/30] migration: Disable multifd explicitly with compression Juan Quintela
2022-11-15 12:12 ` [PATCH 15/30] migration: Take bitmap mutex when completing ram migration Juan Quintela
2022-11-15 12:12 ` [PATCH 16/30] migration: Add postcopy_preempt_active() Juan Quintela
2022-11-15 12:12 ` [PATCH 17/30] migration: Cleanup xbzrle zero page cache update logic Juan Quintela
2022-11-15 12:12 ` [PATCH 18/30] migration: Trivial cleanup save_page_header() on same block check Juan Quintela
2022-11-15 12:12 ` [PATCH 19/30] migration: Remove RAMState.f references in compression code Juan Quintela
2022-11-15 12:12 ` [PATCH 20/30] migration: Yield bitmap_mutex properly when sending/sleeping Juan Quintela
2022-11-15 12:12 ` [PATCH 21/30] migration: Use atomic ops properly for page accountings Juan Quintela
2022-11-15 12:12 ` [PATCH 22/30] migration: Teach PSS about host page Juan Quintela
2022-11-15 12:12 ` [PATCH 23/30] migration: Introduce pss_channel Juan Quintela
2022-11-15 12:12 ` [PATCH 24/30] migration: Add pss_init() Juan Quintela
2022-11-15 12:12 ` [PATCH 25/30] migration: Make PageSearchStatus part of RAMState Juan Quintela
2022-11-15 12:12 ` [PATCH 26/30] migration: Move last_sent_block into PageSearchStatus Juan Quintela
2022-11-15 12:12 ` Juan Quintela [this message]
2022-11-15 12:12 ` [PATCH 28/30] migration: Remove old preempt code around state maintainance Juan Quintela
2022-11-15 12:12 ` [PATCH 29/30] migration: Drop rs->f Juan Quintela
2022-11-15 12:12 ` [PATCH 30/30] migration: Block migration comment or code is wrong Juan Quintela
2022-11-15 14:55 ` [PATCH 00/30] Migration PULL request Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221115121226.26609-28-quintela@redhat.com \
    --to=quintela@redhat.com \
    --cc=berrange@redhat.com \
    --cc=david@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=fam@euphon.net \
    --cc=laurent@vivier.eu \
    --cc=marcandre.lureau@redhat.com \
    --cc=mjt@tls.msk.ru \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=philmd@linaro.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-trivial@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.