All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v2] migration/rdma: Use huge page register VM memory
@ 2021-06-16 13:02 LIZHAOXIN1 [李照鑫]
  0 siblings, 0 replies; 2+ messages in thread
From: LIZHAOXIN1 [李照鑫] @ 2021-06-16 13:02 UTC (permalink / raw)
  To: qemu-devel, quintela, dgilbert, berrange
  Cc: LIZHAOXIN1 [李照鑫]

> When using libvirt for RDMA live migration, if the VM memory is too large, 
> it will take a lot of time to deregister the VM at the source side, resulting 
> in a long downtime (VM 64G, deregister vm time is about 400ms).
> 
> Although the VM's memory uses 2M huge pages, the MLNX driver still uses 4K 
> pages for pin memory, as well as for unpin.
> So we use huge pages to skip the process of pin memory and unpin memory to reduce downtime.
> 

The test environment:
kernel: linux-5.12
MLNX: ConnectX-4 LX
libvirt command:
virsh migrate --live --p2p --persistent --copy-storage-inc --listen-address \
0.0.0.0 --rdma-pin-all --migrateuri rdma://192.168.0.2 [VM] qemu+tcp://192.168.0.2/system

> ---
> v2
> - Add page_size in struct RDMALocalBlock
> - Use page_size to determine whether VM uses huge page
> ---
> 
> Signed-off-by: lizhaoxin <lizhaoxin1@kingsoft.com>
> 
> diff --git a/migration/rdma.c b/migration/rdma.c index 1cdb4561f3..703816ebc7 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -215,6 +215,7 @@ typedef struct RDMALocalBlock {
>      uint64_t       remote_host_addr; /* remote virtual address */
>      uint64_t       offset;
>      uint64_t       length;
> +    uint64_t       page_size;
>      struct         ibv_mr **pmr;    /* MRs for chunk-level registration */
>      struct         ibv_mr *mr;      /* MR for non-chunk-level registration */
>      uint32_t      *remote_keys;     /* rkeys for chunk-level registration */
> @@ -565,7 +566,8 @@ static inline uint8_t *ram_chunk_end(const RDMALocalBlock *rdma_ram_block,
> 
>  static int rdma_add_block(RDMAContext *rdma, const char *block_name,
>                           void *host_addr,
> -                         ram_addr_t block_offset, uint64_t length)
> +                         ram_addr_t block_offset, uint64_t length,
> +                         uint64_t page_size)
>  {
>      RDMALocalBlocks *local = &rdma->local_ram_blocks;
>      RDMALocalBlock *block;
> @@ -595,6 +597,7 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name,
>      block->local_host_addr = host_addr;
>      block->offset = block_offset;
>      block->length = length;
> +    block->page_size = page_size;
>      block->index = local->nb_blocks;
>      block->src_index = ~0U; /* Filled in by the receipt of the block list */
>      block->nb_chunks = ram_chunk_index(host_addr, host_addr + length) + 1UL; @@ -634,7 +637,8 @@ static int qemu_rdma_init_one_block(RAMBlock *rb, void *opaque)
>      void *host_addr = qemu_ram_get_host_addr(rb);
>      ram_addr_t block_offset = qemu_ram_get_offset(rb);
>      ram_addr_t length = qemu_ram_get_used_length(rb);
> -    return rdma_add_block(opaque, block_name, host_addr, block_offset, length);
> +    ram_addr_t page_size = qemu_ram_pagesize(rb);
> +    return rdma_add_block(opaque, block_name, host_addr, block_offset,
> + length, page_size);
>  }
> 
>  /*
> @@ -1123,13 +1127,25 @@ static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
>      RDMALocalBlocks *local = &rdma->local_ram_blocks;
> 
>      for (i = 0; i < local->nb_blocks; i++) {
> -        local->block[i].mr =
> -            ibv_reg_mr(rdma->pd,
> -                    local->block[i].local_host_addr,
> -                    local->block[i].length,
> -                    IBV_ACCESS_LOCAL_WRITE |
> -                    IBV_ACCESS_REMOTE_WRITE
> -                    );
> +        if (local->block[i].page_size != qemu_real_host_page_size) {
> +            local->block[i].mr =
> +                ibv_reg_mr(rdma->pd,
> +                        local->block[i].local_host_addr,
> +                        local->block[i].length,
> +                        IBV_ACCESS_LOCAL_WRITE |
> +                        IBV_ACCESS_REMOTE_WRITE |
> +                        IBV_ACCESS_ON_DEMAND |
> +                        IBV_ACCESS_HUGETLB
> +                        );
> +        } else {
> +            local->block[i].mr =
> +                ibv_reg_mr(rdma->pd,
> +                        local->block[i].local_host_addr,
> +                        local->block[i].length,
> +                        IBV_ACCESS_LOCAL_WRITE |
> +                        IBV_ACCESS_REMOTE_WRITE
> +                        );
> +        }
>          if (!local->block[i].mr) {
>              perror("Failed to register local dest ram block!\n");
>              break;

Hello everyone, please give me some advice.

Thanks.
lizhaoxin1

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH v2] migration/rdma: Use huge page register VM memory
@ 2021-06-10 15:39 LIZHAOXIN1 [李照鑫]
  0 siblings, 0 replies; 2+ messages in thread
From: LIZHAOXIN1 [李照鑫] @ 2021-06-10 15:39 UTC (permalink / raw)
  To: qemu-devel, quintela, dgilbert
  Cc: LIZHAOXIN1 [李照鑫], sunhao2 [孙昊],
	DENGLINWEN [邓林文],
	YANGFENG1 [杨峰]

When using libvirt for RDMA live migration, if the VM memory
is too large, it will take a lot of time to deregister the VM
at the source side, resulting in a long downtime (VM 64G,
deregister vm time is about 400ms).
    
Although the VM's memory uses 2M huge pages, the MLNX driver
still uses 4K pages for pin memory, as well as for unpin.
So we use huge pages to skip the process of pin memory and
unpin memory to reduce downtime.
    
---
v2
- Add page_size in struct RDMALocalBlock
- Use page_size to determine whether VM uses huge page
---
    
Signed-off-by: lizhaoxin <lizhaoxin1@kingsoft.com>

diff --git a/migration/rdma.c b/migration/rdma.c
index 1cdb4561f3..703816ebc7 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -215,6 +215,7 @@ typedef struct RDMALocalBlock {
     uint64_t       remote_host_addr; /* remote virtual address */
     uint64_t       offset;
     uint64_t       length;
+    uint64_t       page_size;
     struct         ibv_mr **pmr;    /* MRs for chunk-level registration */
     struct         ibv_mr *mr;      /* MR for non-chunk-level registration */
     uint32_t      *remote_keys;     /* rkeys for chunk-level registration */
@@ -565,7 +566,8 @@ static inline uint8_t *ram_chunk_end(const RDMALocalBlock *rdma_ram_block,
 
 static int rdma_add_block(RDMAContext *rdma, const char *block_name,
                          void *host_addr,
-                         ram_addr_t block_offset, uint64_t length)
+                         ram_addr_t block_offset, uint64_t length,
+                         uint64_t page_size)
 {
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
     RDMALocalBlock *block;
@@ -595,6 +597,7 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name,
     block->local_host_addr = host_addr;
     block->offset = block_offset;
     block->length = length;
+    block->page_size = page_size;
     block->index = local->nb_blocks;
     block->src_index = ~0U; /* Filled in by the receipt of the block list */
     block->nb_chunks = ram_chunk_index(host_addr, host_addr + length) + 1UL;
@@ -634,7 +637,8 @@ static int qemu_rdma_init_one_block(RAMBlock *rb, void *opaque)
     void *host_addr = qemu_ram_get_host_addr(rb);
     ram_addr_t block_offset = qemu_ram_get_offset(rb);
     ram_addr_t length = qemu_ram_get_used_length(rb);
-    return rdma_add_block(opaque, block_name, host_addr, block_offset, length);
+    ram_addr_t page_size = qemu_ram_pagesize(rb);
+    return rdma_add_block(opaque, block_name, host_addr, block_offset, length, page_size);
 }
 
 /*
@@ -1123,13 +1127,25 @@ static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
 
     for (i = 0; i < local->nb_blocks; i++) {
-        local->block[i].mr =
-            ibv_reg_mr(rdma->pd,
-                    local->block[i].local_host_addr,
-                    local->block[i].length,
-                    IBV_ACCESS_LOCAL_WRITE |
-                    IBV_ACCESS_REMOTE_WRITE
-                    );
+        if (local->block[i].page_size != qemu_real_host_page_size) {
+            local->block[i].mr =
+                ibv_reg_mr(rdma->pd,
+                        local->block[i].local_host_addr,
+                        local->block[i].length,
+                        IBV_ACCESS_LOCAL_WRITE |
+                        IBV_ACCESS_REMOTE_WRITE |
+                        IBV_ACCESS_ON_DEMAND |
+                        IBV_ACCESS_HUGETLB
+                        );
+        } else {
+            local->block[i].mr =
+                ibv_reg_mr(rdma->pd,
+                        local->block[i].local_host_addr,
+                        local->block[i].length,
+                        IBV_ACCESS_LOCAL_WRITE |
+                        IBV_ACCESS_REMOTE_WRITE
+                        );
+        }
         if (!local->block[i].mr) {
             perror("Failed to register local dest ram block!\n");
             break;

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-06-16 13:19 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-16 13:02 [PATCH v2] migration/rdma: Use huge page register VM memory LIZHAOXIN1 [李照鑫]
  -- strict thread matches above, loose matches on Subject: below --
2021-06-10 15:39 LIZHAOXIN1 [李照鑫]

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.