All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zhimin Feng <fengzhimin1@huawei.com>
To: <quintela@redhat.com>, <dgilbert@redhat.com>, <armbru@redhat.com>,
	<eblake@redhat.com>
Cc: jemmy858585@gmail.com, fengzhimin <fengzhimin1@huawei.com>,
	qemu-devel@nongnu.org, zhang.zhanghailiang@huawei.com
Subject: [PATCH RFC 08/12] migration/rdma: register memory for multiRDMA channels
Date: Thu, 9 Jan 2020 12:59:18 +0800	[thread overview]
Message-ID: <20200109045922.904-9-fengzhimin1@huawei.com> (raw)
In-Reply-To: <20200109045922.904-1-fengzhimin1@huawei.com>

From: fengzhimin <fengzhimin1@huawei.com>

register memory for multiRDMA channels and transmit the destination
the keys to source to use including the virtual addresses.

Signed-off-by: fengzhimin <fengzhimin1@huawei.com>
---
 migration/rdma.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 190 insertions(+), 2 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 518a21b0fe..6ecc870844 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3847,6 +3847,15 @@ static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data)
         return rdma_block_notification_handle(opaque, data);
 
     case RAM_CONTROL_HOOK:
+        if (migrate_use_multiRDMA()) {
+            int i;
+            int thread_count = migrate_multiRDMA_channels();
+            /* Inform dest recv_thread to poll */
+            for (i = 0; i < thread_count; i++) {
+                qemu_sem_post(&multiRDMA_recv_state->params[i].sem);
+            }
+        }
+
         return qemu_rdma_registration_handle(f, opaque);
 
     default:
@@ -3920,6 +3929,17 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
         head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
         trace_qemu_rdma_registration_stop_ram();
 
+        if (migrate_use_multiRDMA()) {
+            /*
+             * Inform the multiRDMA channels to register memory
+             */
+            int i;
+            int thread_count = migrate_multiRDMA_channels();
+            for (i = 0; i < thread_count; i++) {
+                qemu_sem_post(&multiRDMA_send_state->params[i].sem);
+            }
+        }
+
         /*
          * Make sure that we parallelize the pinning on both sides.
          * For very large guests, doing this serially takes a really
@@ -3985,6 +4005,15 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
     head.type = RDMA_CONTROL_REGISTER_FINISHED;
     ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL, NULL);
 
+    if (migrate_use_multiRDMA()) {
+        /* Inform src send_thread to send FINISHED signal */
+        int i;
+        int thread_count = migrate_multiRDMA_channels();
+        for (i = 0; i < thread_count; i++) {
+            qemu_sem_post(&multiRDMA_send_state->params[i].sem);
+        }
+    }
+
     if (ret < 0) {
         goto err;
     }
@@ -4150,18 +4179,119 @@ err:
     return msg;
 }
 
+static int qemu_multiRDMA_registration_handle(void *opaque)
+{
+    RDMAControlHeader blocks = { .type = RDMA_CONTROL_RAM_BLOCKS_RESULT,
+                                 .repeat = 1 };
+    MultiRDMARecvParams *p = opaque;
+    RDMAContext *rdma = p->rdma;
+    RDMALocalBlocks *local = &rdma->local_ram_blocks;
+    RDMAControlHeader head;
+    int ret = 0;
+    int i = 0;
+
+    CHECK_ERROR_STATE();
+
+    do {
+        ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE);
+
+        if (ret < 0) {
+            break;
+        }
+
+        if (head.repeat > RDMA_CONTROL_MAX_COMMANDS_PER_MESSAGE) {
+            error_report("rdma: Too many requests in this message (%d)."
+                         "Bailing.", head.repeat);
+            ret = -EIO;
+            break;
+        }
+
+        switch (head.type) {
+        case RDMA_CONTROL_REGISTER_FINISHED:
+            goto out;
+        case RDMA_CONTROL_RAM_BLOCKS_REQUEST:
+            qsort(rdma->local_ram_blocks.block,
+                  rdma->local_ram_blocks.nb_blocks,
+                  sizeof(RDMALocalBlock), dest_ram_sort_func);
+
+            if (rdma->pin_all) {
+                ret = qemu_rdma_reg_whole_ram_blocks(rdma);
+                if (ret) {
+                    error_report("rdma migration: error dest "
+                                 "registering ram blocks");
+                    goto out;
+                }
+            }
+
+            for (i = 0; i < local->nb_blocks; i++) {
+                /*
+                 * The multiRDMA threads only register ram block
+                 * to send data, other blocks are sent by main RDMA thread.
+                 */
+                if (strcmp(local->block[i].block_name, "mach-virt.ram") == 0) {
+                    rdma->dest_blocks[i].remote_host_addr =
+                        (uintptr_t)(local->block[i].local_host_addr);
+
+                    if (rdma->pin_all) {
+                        rdma->dest_blocks[i].remote_rkey =
+                            local->block[i].mr->rkey;
+                    }
+
+                    rdma->dest_blocks[i].offset = local->block[i].offset;
+                    rdma->dest_blocks[i].length = local->block[i].length;
+
+                    dest_block_to_network(&rdma->dest_blocks[i]);
+
+                    break;
+                }
+            }
+
+            blocks.len = rdma->local_ram_blocks.nb_blocks
+                                                * sizeof(RDMADestBlock);
+
+            ret = qemu_rdma_post_send_control(rdma,
+                                              (uint8_t *) rdma->dest_blocks,
+                                              &blocks);
+
+            if (ret < 0) {
+                error_report("rdma migration: error sending remote info");
+                goto out;
+            }
+
+            break;
+        default:
+            error_report("Unknown control message %s", control_desc(head.type));
+            ret = -EIO;
+            goto out;
+        }
+    } while (1);
+out:
+    if (ret < 0) {
+        rdma->error_state = ret;
+    }
+    return ret;
+}
+
 static void *multiRDMA_recv_thread(void *opaque)
 {
     MultiRDMARecvParams *p = opaque;
+    int ret;
 
     while (true) {
+        qemu_sem_wait(&p->sem);
+
         qemu_mutex_lock(&p->mutex);
         if (p->quit) {
             qemu_mutex_unlock(&p->mutex);
             break;
         }
         qemu_mutex_unlock(&p->mutex);
-        qemu_sem_wait(&p->sem);
+
+        ret = qemu_multiRDMA_registration_handle(opaque);
+        if (ret < 0) {
+            qemu_file_set_error(p->file, ret);
+            break;
+        }
     }
 
     qemu_mutex_lock(&p->mutex);
@@ -4378,18 +4508,76 @@ static void migration_rdma_send_initial_packet(QEMUFile *f, uint8_t id)
 static void *multiRDMA_send_thread(void *opaque)
 {
     MultiRDMASendParams *p = opaque;
+    RDMAContext *rdma = p->rdma;
+    int ret;
 
     /* send the multiRDMA channels magic */
     migration_rdma_send_initial_packet(p->file, p->id);
 
+    /* wait for semaphore notification to register memory */
+    qemu_sem_wait(&p->sem);
+
+    RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT };
+    RDMALocalBlocks *local = &rdma->local_ram_blocks;
+    int reg_result_idx, i, nb_dest_blocks;
+    RDMAControlHeader head = { .len = 0, .repeat = 1 };
+
+    head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
+
+    ret = qemu_rdma_exchange_send(rdma, &head, NULL, &resp,
+            &reg_result_idx, rdma->pin_all ?
+            qemu_rdma_reg_whole_ram_blocks : NULL);
+    if (ret < 0) {
+        return NULL;
+    }
+
+    nb_dest_blocks = resp.len / sizeof(RDMADestBlock);
+
+    if (local->nb_blocks != nb_dest_blocks) {
+        rdma->error_state = -EINVAL;
+        return NULL;
+    }
+
+    qemu_rdma_move_header(rdma, reg_result_idx, &resp);
+    memcpy(rdma->dest_blocks,
+           rdma->wr_data[reg_result_idx].control_curr, resp.len);
+
+    for (i = 0; i < nb_dest_blocks; i++) {
+        /*
+         * The multiRDMA threads only register ram block
+         * to send data, other blocks are sent by main RDMA thread.
+         */
+        if (strcmp(local->block[i].block_name, "mach-virt.ram") == 0) {
+            network_to_dest_block(&rdma->dest_blocks[i]);
+
+            /* We require that the blocks are in the same order */
+            if (rdma->dest_blocks[i].length != local->block[i].length) {
+                rdma->error_state = -EINVAL;
+                return NULL;
+            }
+            local->block[i].remote_host_addr =
+                rdma->dest_blocks[i].remote_host_addr;
+            local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey;
+            break;
+        }
+    }
+
     while (true) {
+        qemu_sem_wait(&p->sem);
+
         qemu_mutex_lock(&p->mutex);
         if (p->quit) {
             qemu_mutex_unlock(&p->mutex);
             break;
         }
         qemu_mutex_unlock(&p->mutex);
-        qemu_sem_wait(&p->sem);
+
+        /* Send FINISHED to the destination */
+        head.type = RDMA_CONTROL_REGISTER_FINISHED;
+        ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL, NULL);
+        if (ret < 0) {
+            return NULL;
+        }
     }
 
     qemu_mutex_lock(&p->mutex);
-- 
2.19.1




  parent reply	other threads:[~2020-01-09  9:58 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-09  4:59 [PATCH RFC 00/12] *** mulitple RDMA channels for migration *** Zhimin Feng
2020-01-09  4:59 ` [PATCH RFC 01/12] migration: Add multiRDMA capability support Zhimin Feng
2020-01-13 15:30   ` Markus Armbruster
2020-01-15  1:55     ` fengzhimin
2020-01-13 16:26   ` Eric Blake
2020-01-15  2:04     ` fengzhimin
2020-01-15 18:09   ` Dr. David Alan Gilbert
2020-01-16 13:18     ` Juan Quintela
2020-01-17  1:30       ` fengzhimin
2020-01-09  4:59 ` [PATCH RFC 02/12] migration: Export the 'migration_incoming_setup' function and add the 'migrate_use_rdma_pin_all' function Zhimin Feng
2020-01-15 18:57   ` Dr. David Alan Gilbert
2020-01-16 13:19   ` Juan Quintela
2020-01-09  4:59 ` [PATCH RFC 03/12] migration: Create the multi-rdma-channels parameter Zhimin Feng
2020-01-13 15:34   ` Markus Armbruster
2020-01-15  1:57     ` fengzhimin
2020-01-16 13:20   ` Juan Quintela
2020-01-09  4:59 ` [PATCH RFC 04/12] migration/rdma: Create multiRDMA migration threads Zhimin Feng
2020-01-16 13:25   ` Juan Quintela
2020-01-17  1:32     ` fengzhimin
2020-01-09  4:59 ` [PATCH RFC 05/12] migration/rdma: Create the multiRDMA channels Zhimin Feng
2020-01-15 19:54   ` Dr. David Alan Gilbert
2020-01-16 13:30   ` Juan Quintela
2020-01-09  4:59 ` [PATCH RFC 06/12] migration/rdma: Transmit initial package Zhimin Feng
2020-01-15 18:36   ` Dr. David Alan Gilbert
2020-01-09  4:59 ` [PATCH RFC 07/12] migration/rdma: Be sure all channels are created Zhimin Feng
2020-01-09  4:59 ` Zhimin Feng [this message]
2020-01-09  4:59 ` [PATCH RFC 09/12] migration/rdma: Wait for all multiRDMA to complete registration Zhimin Feng
2020-01-09  4:59 ` [PATCH RFC 10/12] migration/rdma: use multiRDMA to send RAM block for rdma-pin-all mode Zhimin Feng
2020-01-09  4:59 ` [PATCH RFC 11/12] migration/rdma: use multiRDMA to send RAM block for NOT " Zhimin Feng
2020-01-09  4:59 ` [PATCH RFC 12/12] migration/rdma: only register the virt-ram block for MultiRDMA Zhimin Feng
2020-01-17 18:52   ` Dr. David Alan Gilbert
2020-01-19  1:44     ` fengzhimin
2020-01-20  9:05       ` Dr. David Alan Gilbert
2020-01-21  1:30         ` fengzhimin
2020-01-09 10:38 ` [PATCH RFC 00/12] *** mulitple RDMA channels for migration *** no-reply
2020-01-15 19:57 ` Dr. David Alan Gilbert
2020-01-16  1:37   ` fengzhimin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200109045922.904-9-fengzhimin1@huawei.com \
    --to=fengzhimin1@huawei.com \
    --cc=armbru@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=eblake@redhat.com \
    --cc=jemmy858585@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=zhang.zhanghailiang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.