All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path
@ 2021-05-25  8:05 Li Zhijian
  2021-05-25  8:05 ` [PATCH v2 2/4] migration/rdma: Fix rdma_addrinfo res leaks Li Zhijian
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Li Zhijian @ 2021-05-25  8:05 UTC (permalink / raw)
  To: quintela, dgilbert; +Cc: qemu-devel, Li Zhijian

the error path after calling qemu_rdma_dest_init() should do rdma cleanup

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/rdma.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 41726cc74a8..7e7595faabf 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -4040,7 +4040,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
 
     if (ret) {
         ERROR(errp, "listening on socket!");
-        goto err;
+        goto cleanup_rdma;
     }
 
     trace_rdma_start_incoming_migration_after_rdma_listen();
@@ -4050,7 +4050,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
         rdma_return_path = qemu_rdma_data_init(host_port, &local_err);
 
         if (rdma_return_path == NULL) {
-            goto err;
+            goto cleanup_rdma;
         }
 
         qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
@@ -4059,6 +4059,9 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
     qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
                         NULL, (void *)(intptr_t)rdma);
     return;
+
+cleanup_rdma:
+    qemu_rdma_cleanup(rdma);
 err:
     error_propagate(errp, local_err);
     if (rdma) {
-- 
2.30.2





^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/4] migration/rdma: Fix rdma_addrinfo res leaks
  2021-05-25  8:05 [PATCH v2 1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path Li Zhijian
@ 2021-05-25  8:05 ` Li Zhijian
  2021-05-25  8:05 ` [PATCH v2 3/4] migration/rdma: destination: create the return patch after the first accept Li Zhijian
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Li Zhijian @ 2021-05-25  8:05 UTC (permalink / raw)
  To: quintela, dgilbert; +Cc: qemu-devel, Li Zhijian

rdma_freeaddrinfo() is the reverse operation of rdma_getaddrinfo()

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/rdma.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/migration/rdma.c b/migration/rdma.c
index 7e7595faabf..651534e8255 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -987,10 +987,12 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
         }
     }
 
+    rdma_freeaddrinfo(res);
     ERROR(errp, "could not resolve address %s", rdma->host);
     goto err_resolve_get_addr;
 
 route:
+    rdma_freeaddrinfo(res);
     qemu_rdma_dump_gid("source_resolve_addr", rdma->cm_id);
 
     ret = rdma_get_cm_event(rdma->channel, &cm_event);
@@ -2593,6 +2595,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
         break;
     }
 
+    rdma_freeaddrinfo(res);
     if (!e) {
         ERROR(errp, "Error: could not rdma_bind_addr!");
         goto err_dest_init_bind_addr;
-- 
2.30.2





^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 3/4] migration/rdma: destination: create the return patch after the first accept
  2021-05-25  8:05 [PATCH v2 1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path Li Zhijian
  2021-05-25  8:05 ` [PATCH v2 2/4] migration/rdma: Fix rdma_addrinfo res leaks Li Zhijian
@ 2021-05-25  8:05 ` Li Zhijian
  2021-05-25  9:40   ` Dr. David Alan Gilbert
  2021-05-25  8:05 ` [PATCH v2 4/4] migration/rdma: source: poll cm_event from return path Li Zhijian
  2021-05-26 11:30 ` [PATCH v2 1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path Dr. David Alan Gilbert
  3 siblings, 1 reply; 7+ messages in thread
From: Li Zhijian @ 2021-05-25  8:05 UTC (permalink / raw)
  To: quintela, dgilbert; +Cc: qemu-devel, Li Zhijian

destination side:
$ build/qemu-system-x86_64 -enable-kvm -netdev tap,id=hn0,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device e1000,netdev=hn0,mac=50:52:54:00:11:22 -boot c -drive if=none,file=./Fedora-rdma-server-migration.qcow2,id=drive-virtio-disk0 -device virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -m 2048 -smp 2 -device piix3-usb-uhci -device usb-tablet -monitor stdio -vga qxl -spice streaming-video=filter,port=5902,disable-ticketing -incoming rdma:192.168.1.10:8888
(qemu) migrate_set_capability postcopy-ram on
(qemu)
dest_init RDMA Device opened: kernel name rocep1s0f0 uverbs device name uverbs0, infiniband_verbs class device path /sys/class/infiniband_verbs/uverbs0, infiniband class device path /sys/class/infiniband/rocep1s0f0, transport: (2) Ethernet
Segmentation fault (core dumped)

 (gdb) bt
 #0  qemu_rdma_accept (rdma=0x0) at ../migration/rdma.c:3272
 #1  rdma_accept_incoming_migration (opaque=0x0) at     ../migration/rdma.c:3986
 #2  0x0000563c9e51f02a in aio_dispatch_handler
     (ctx=ctx@entry=0x563ca0606010, node=0x563ca12b2150) at ../util/aio-posix.c:329
 #3  0x0000563c9e51f752 in aio_dispatch_handlers (ctx=0x563ca0606010) at      ../util/aio-posix.c:372
 #4  aio_dispatch (ctx=0x563ca0606010) at ../util/aio-posix.c:382
 #5  0x0000563c9e4f4d9e in aio_ctx_dispatch (source=<optimized out>,      callback=<optimized out>, user_data=<optimized out>)    at ../util/async.c:306
 #6  0x00007fe96ef3fa9f in g_main_context_dispatch () at      /lib64/libglib-2.0.so.0
 #7  0x0000563c9e4ffeb8 in glib_pollfds_poll () at     ../util/main-loop.c:231
 #8  os_host_main_loop_wait (timeout=12188789) at     ../util/main-loop.c:254
 #9  main_loop_wait (nonblocking=nonblocking@entry=0) at     ../util/main-loop.c:530
 #10 0x0000563c9e3c7211 in qemu_main_loop () at     ../softmmu/runstate.c:725
 #11 0x0000563c9dfd46fe in main (argc=<optimized out>, argv=<optimized     out>, envp=<optimized out>) at ../softmmu/main.c:50

The rdma return path will not be created when qemu incoming is starting
since migrate_copy() is false at that moment, then a  NULL return path
rdma was referenced if the user enabled postcopy later.

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
V2: alloc memory for host_port
---
 migration/rdma.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 651534e8255..d829d08d076 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -316,6 +316,7 @@ typedef struct RDMALocalBlocks {
 typedef struct RDMAContext {
     char *host;
     int port;
+    char *host_port;
 
     RDMAWorkRequestData wr_data[RDMA_WRID_MAX];
 
@@ -2392,7 +2393,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
         rdma->channel = NULL;
     }
     g_free(rdma->host);
+    g_free(rdma->host_port);
     rdma->host = NULL;
+    rdma->host_port = NULL;
 }
 
 
@@ -2648,6 +2651,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
         if (!inet_parse(addr, host_port, NULL)) {
             rdma->port = atoi(addr->port);
             rdma->host = g_strdup(addr->host);
+            rdma->host_port = g_strdup(host_port);
         } else {
             ERROR(errp, "bad RDMA migration address '%s'", host_port);
             g_free(rdma);
@@ -3276,6 +3280,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
                                             .private_data = &cap,
                                             .private_data_len = sizeof(cap),
                                          };
+    RDMAContext *rdma_return_path = NULL;
     struct rdma_cm_event *cm_event;
     struct ibv_context *verbs;
     int ret = -EINVAL;
@@ -3291,6 +3296,20 @@ static int qemu_rdma_accept(RDMAContext *rdma)
         goto err_rdma_dest_wait;
     }
 
+    /*
+     * initialize the RDMAContext for return path for postcopy after first
+     * connection request reached.
+     */
+    if (migrate_postcopy() && !rdma->is_return_path) {
+        rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL);
+        if (rdma_return_path == NULL) {
+            rdma_ack_cm_event(cm_event);
+            goto err_rdma_dest_wait;
+        }
+
+        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
+    }
+
     memcpy(&cap, cm_event->param.conn.private_data, sizeof(cap));
 
     network_to_caps(&cap);
@@ -3406,6 +3425,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
 err_rdma_dest_wait:
     rdma->error_state = ret;
     qemu_rdma_cleanup(rdma);
+    g_free(rdma_return_path);
     return ret;
 }
 
@@ -4048,17 +4068,6 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
 
     trace_rdma_start_incoming_migration_after_rdma_listen();
 
-    /* initialize the RDMAContext for return path */
-    if (migrate_postcopy()) {
-        rdma_return_path = qemu_rdma_data_init(host_port, &local_err);
-
-        if (rdma_return_path == NULL) {
-            goto cleanup_rdma;
-        }
-
-        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
-    }
-
     qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
                         NULL, (void *)(intptr_t)rdma);
     return;
@@ -4069,6 +4078,7 @@ err:
     error_propagate(errp, local_err);
     if (rdma) {
         g_free(rdma->host);
+        g_free(rdma->host_port);
     }
     g_free(rdma);
     g_free(rdma_return_path);
-- 
2.30.2





^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 4/4] migration/rdma: source: poll cm_event from return path
  2021-05-25  8:05 [PATCH v2 1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path Li Zhijian
  2021-05-25  8:05 ` [PATCH v2 2/4] migration/rdma: Fix rdma_addrinfo res leaks Li Zhijian
  2021-05-25  8:05 ` [PATCH v2 3/4] migration/rdma: destination: create the return patch after the first accept Li Zhijian
@ 2021-05-25  8:05 ` Li Zhijian
  2021-05-25 10:26   ` Dr. David Alan Gilbert
  2021-05-26 11:30 ` [PATCH v2 1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path Dr. David Alan Gilbert
  3 siblings, 1 reply; 7+ messages in thread
From: Li Zhijian @ 2021-05-25  8:05 UTC (permalink / raw)
  To: quintela, dgilbert; +Cc: qemu-devel, Li Zhijian

source side always blocks if postcopy is only enabled at source side.
users are not able to cancel this migration in this case.

Let source side have chance to cancel this migration

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
V2: utilize poll to check cm event
---
 migration/rdma.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index d829d08d076..f67e21b4f54 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -36,6 +36,7 @@
 #include <rdma/rdma_cma.h>
 #include "trace.h"
 #include "qom/object.h"
+#include <poll.h>
 
 /*
  * Print and error on both the Monitor and the Log file.
@@ -2460,7 +2461,36 @@ err_rdma_source_init:
     return -1;
 }
 
-static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
+static int qemu_get_cm_event_timeout(RDMAContext *rdma,
+                                     struct rdma_cm_event **cm_event,
+                                     long msec, Error **errp)
+{
+    int ret;
+    struct pollfd poll_fd = {
+                                .fd = rdma->channel->fd,
+                                .events = POLLIN,
+                                .revents = 0
+                            };
+
+    do {
+        ret = poll(&poll_fd, 1, msec);
+    } while (ret < 0 && errno == EINTR);
+
+    if (ret == 0) {
+        ERROR(errp, "poll cm event timeout");
+        return -1;
+    } else if (ret < 0) {
+        ERROR(errp, "failed to pull cm event, errno=%i", errno);
+        return -1;
+    } else if (poll_fd.revents & POLLIN) {
+        return rdma_get_cm_event(rdma->channel, cm_event);
+    } else {
+        ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents);
+        return -1;
+    }
+}
+
+static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path)
 {
     RDMACapabilities cap = {
                                 .version = RDMA_CONTROL_VERSION_CURRENT,
@@ -2498,7 +2528,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
         goto err_rdma_source_connect;
     }
 
-    ret = rdma_get_cm_event(rdma->channel, &cm_event);
+    if (return_path) {
+        ret = qemu_get_cm_event_timeout(rdma, &cm_event, 5000, errp);
+    } else {
+        ret = rdma_get_cm_event(rdma->channel, &cm_event);
+    }
     if (ret) {
         perror("rdma_get_cm_event after rdma_connect");
         ERROR(errp, "connecting to destination!");
@@ -4111,7 +4145,7 @@ void rdma_start_outgoing_migration(void *opaque,
     }
 
     trace_rdma_start_outgoing_migration_after_rdma_source_init();
-    ret = qemu_rdma_connect(rdma, errp);
+    ret = qemu_rdma_connect(rdma, errp, false);
 
     if (ret) {
         goto err;
@@ -4132,7 +4166,7 @@ void rdma_start_outgoing_migration(void *opaque,
             goto return_path_err;
         }
 
-        ret = qemu_rdma_connect(rdma_return_path, errp);
+        ret = qemu_rdma_connect(rdma_return_path, errp, true);
 
         if (ret) {
             goto return_path_err;
-- 
2.30.2





^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 3/4] migration/rdma: destination: create the return patch after the first accept
  2021-05-25  8:05 ` [PATCH v2 3/4] migration/rdma: destination: create the return patch after the first accept Li Zhijian
@ 2021-05-25  9:40   ` Dr. David Alan Gilbert
  0 siblings, 0 replies; 7+ messages in thread
From: Dr. David Alan Gilbert @ 2021-05-25  9:40 UTC (permalink / raw)
  To: Li Zhijian; +Cc: qemu-devel, quintela

* Li Zhijian (lizhijian@cn.fujitsu.com) wrote:
> destination side:
> $ build/qemu-system-x86_64 -enable-kvm -netdev tap,id=hn0,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device e1000,netdev=hn0,mac=50:52:54:00:11:22 -boot c -drive if=none,file=./Fedora-rdma-server-migration.qcow2,id=drive-virtio-disk0 -device virtio-blk-pci,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 -m 2048 -smp 2 -device piix3-usb-uhci -device usb-tablet -monitor stdio -vga qxl -spice streaming-video=filter,port=5902,disable-ticketing -incoming rdma:192.168.1.10:8888
> (qemu) migrate_set_capability postcopy-ram on
> (qemu)
> dest_init RDMA Device opened: kernel name rocep1s0f0 uverbs device name uverbs0, infiniband_verbs class device path /sys/class/infiniband_verbs/uverbs0, infiniband class device path /sys/class/infiniband/rocep1s0f0, transport: (2) Ethernet
> Segmentation fault (core dumped)
> 
>  (gdb) bt
>  #0  qemu_rdma_accept (rdma=0x0) at ../migration/rdma.c:3272
>  #1  rdma_accept_incoming_migration (opaque=0x0) at     ../migration/rdma.c:3986
>  #2  0x0000563c9e51f02a in aio_dispatch_handler
>      (ctx=ctx@entry=0x563ca0606010, node=0x563ca12b2150) at ../util/aio-posix.c:329
>  #3  0x0000563c9e51f752 in aio_dispatch_handlers (ctx=0x563ca0606010) at      ../util/aio-posix.c:372
>  #4  aio_dispatch (ctx=0x563ca0606010) at ../util/aio-posix.c:382
>  #5  0x0000563c9e4f4d9e in aio_ctx_dispatch (source=<optimized out>,      callback=<optimized out>, user_data=<optimized out>)    at ../util/async.c:306
>  #6  0x00007fe96ef3fa9f in g_main_context_dispatch () at      /lib64/libglib-2.0.so.0
>  #7  0x0000563c9e4ffeb8 in glib_pollfds_poll () at     ../util/main-loop.c:231
>  #8  os_host_main_loop_wait (timeout=12188789) at     ../util/main-loop.c:254
>  #9  main_loop_wait (nonblocking=nonblocking@entry=0) at     ../util/main-loop.c:530
>  #10 0x0000563c9e3c7211 in qemu_main_loop () at     ../softmmu/runstate.c:725
>  #11 0x0000563c9dfd46fe in main (argc=<optimized out>, argv=<optimized     out>, envp=<optimized out>) at ../softmmu/main.c:50
> 
> The rdma return path will not be created when qemu incoming is starting
> since migrate_copy() is false at that moment, then a  NULL return path
> rdma was referenced if the user enabled postcopy later.
> 
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

> ---
> V2: alloc memory for host_port
> ---
>  migration/rdma.c | 32 +++++++++++++++++++++-----------
>  1 file changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 651534e8255..d829d08d076 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -316,6 +316,7 @@ typedef struct RDMALocalBlocks {
>  typedef struct RDMAContext {
>      char *host;
>      int port;
> +    char *host_port;
>  
>      RDMAWorkRequestData wr_data[RDMA_WRID_MAX];
>  
> @@ -2392,7 +2393,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
>          rdma->channel = NULL;
>      }
>      g_free(rdma->host);
> +    g_free(rdma->host_port);
>      rdma->host = NULL;
> +    rdma->host_port = NULL;
>  }
>  
>  
> @@ -2648,6 +2651,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
>          if (!inet_parse(addr, host_port, NULL)) {
>              rdma->port = atoi(addr->port);
>              rdma->host = g_strdup(addr->host);
> +            rdma->host_port = g_strdup(host_port);
>          } else {
>              ERROR(errp, "bad RDMA migration address '%s'", host_port);
>              g_free(rdma);
> @@ -3276,6 +3280,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>                                              .private_data = &cap,
>                                              .private_data_len = sizeof(cap),
>                                           };
> +    RDMAContext *rdma_return_path = NULL;
>      struct rdma_cm_event *cm_event;
>      struct ibv_context *verbs;
>      int ret = -EINVAL;
> @@ -3291,6 +3296,20 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>          goto err_rdma_dest_wait;
>      }
>  
> +    /*
> +     * initialize the RDMAContext for return path for postcopy after first
> +     * connection request reached.
> +     */
> +    if (migrate_postcopy() && !rdma->is_return_path) {
> +        rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL);
> +        if (rdma_return_path == NULL) {
> +            rdma_ack_cm_event(cm_event);
> +            goto err_rdma_dest_wait;
> +        }
> +
> +        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
> +    }
> +
>      memcpy(&cap, cm_event->param.conn.private_data, sizeof(cap));
>  
>      network_to_caps(&cap);
> @@ -3406,6 +3425,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
>  err_rdma_dest_wait:
>      rdma->error_state = ret;
>      qemu_rdma_cleanup(rdma);
> +    g_free(rdma_return_path);
>      return ret;
>  }
>  
> @@ -4048,17 +4068,6 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
>  
>      trace_rdma_start_incoming_migration_after_rdma_listen();
>  
> -    /* initialize the RDMAContext for return path */
> -    if (migrate_postcopy()) {
> -        rdma_return_path = qemu_rdma_data_init(host_port, &local_err);
> -
> -        if (rdma_return_path == NULL) {
> -            goto cleanup_rdma;
> -        }
> -
> -        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
> -    }
> -
>      qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
>                          NULL, (void *)(intptr_t)rdma);
>      return;
> @@ -4069,6 +4078,7 @@ err:
>      error_propagate(errp, local_err);
>      if (rdma) {
>          g_free(rdma->host);
> +        g_free(rdma->host_port);
>      }
>      g_free(rdma);
>      g_free(rdma_return_path);
> -- 
> 2.30.2
> 
> 
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 4/4] migration/rdma: source: poll cm_event from return path
  2021-05-25  8:05 ` [PATCH v2 4/4] migration/rdma: source: poll cm_event from return path Li Zhijian
@ 2021-05-25 10:26   ` Dr. David Alan Gilbert
  0 siblings, 0 replies; 7+ messages in thread
From: Dr. David Alan Gilbert @ 2021-05-25 10:26 UTC (permalink / raw)
  To: Li Zhijian; +Cc: qemu-devel, quintela

* Li Zhijian (lizhijian@cn.fujitsu.com) wrote:
> source side always blocks if postcopy is only enabled at source side.
> users are not able to cancel this migration in this case.
> 
> Let source side have chance to cancel this migration
> 
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> ---
> V2: utilize poll to check cm event
> ---
>  migration/rdma.c | 42 ++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 38 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index d829d08d076..f67e21b4f54 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -36,6 +36,7 @@
>  #include <rdma/rdma_cma.h>
>  #include "trace.h"
>  #include "qom/object.h"
> +#include <poll.h>
>  
>  /*
>   * Print and error on both the Monitor and the Log file.
> @@ -2460,7 +2461,36 @@ err_rdma_source_init:
>      return -1;
>  }
>  
> -static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
> +static int qemu_get_cm_event_timeout(RDMAContext *rdma,
> +                                     struct rdma_cm_event **cm_event,
> +                                     long msec, Error **errp)
> +{
> +    int ret;
> +    struct pollfd poll_fd = {
> +                                .fd = rdma->channel->fd,
> +                                .events = POLLIN,
> +                                .revents = 0
> +                            };
> +
> +    do {
> +        ret = poll(&poll_fd, 1, msec);
> +    } while (ret < 0 && errno == EINTR);
> +
> +    if (ret == 0) {
> +        ERROR(errp, "poll cm event timeout");
> +        return -1;
> +    } else if (ret < 0) {
> +        ERROR(errp, "failed to pull cm event, errno=%i", errno);

Typo: 'poll' - I can fix that.

> +        return -1;
> +    } else if (poll_fd.revents & POLLIN) {
> +        return rdma_get_cm_event(rdma->channel, cm_event);
> +    } else {
> +        ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents);
> +        return -1;
> +    }
> +}
> +
> +static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path)
>  {
>      RDMACapabilities cap = {
>                                  .version = RDMA_CONTROL_VERSION_CURRENT,
> @@ -2498,7 +2528,11 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
>          goto err_rdma_source_connect;
>      }
>  
> -    ret = rdma_get_cm_event(rdma->channel, &cm_event);
> +    if (return_path) {
> +        ret = qemu_get_cm_event_timeout(rdma, &cm_event, 5000, errp);

Fixed timeouts are not a great fix; but I can't think of anything
better; the only alternative would be to register the fd on the main
thread's poll and get it to be called back when the event happened.

But for now;

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

> +    } else {
> +        ret = rdma_get_cm_event(rdma->channel, &cm_event);
> +    }
>      if (ret) {
>          perror("rdma_get_cm_event after rdma_connect");
>          ERROR(errp, "connecting to destination!");
> @@ -4111,7 +4145,7 @@ void rdma_start_outgoing_migration(void *opaque,
>      }
>  
>      trace_rdma_start_outgoing_migration_after_rdma_source_init();
> -    ret = qemu_rdma_connect(rdma, errp);
> +    ret = qemu_rdma_connect(rdma, errp, false);
>  
>      if (ret) {
>          goto err;
> @@ -4132,7 +4166,7 @@ void rdma_start_outgoing_migration(void *opaque,
>              goto return_path_err;
>          }
>  
> -        ret = qemu_rdma_connect(rdma_return_path, errp);
> +        ret = qemu_rdma_connect(rdma_return_path, errp, true);
>  
>          if (ret) {
>              goto return_path_err;
> -- 
> 2.30.2
> 
> 
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path
  2021-05-25  8:05 [PATCH v2 1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path Li Zhijian
                   ` (2 preceding siblings ...)
  2021-05-25  8:05 ` [PATCH v2 4/4] migration/rdma: source: poll cm_event from return path Li Zhijian
@ 2021-05-26 11:30 ` Dr. David Alan Gilbert
  3 siblings, 0 replies; 7+ messages in thread
From: Dr. David Alan Gilbert @ 2021-05-26 11:30 UTC (permalink / raw)
  To: Li Zhijian; +Cc: qemu-devel, quintela

* Li Zhijian (lizhijian@cn.fujitsu.com) wrote:
> the error path after calling qemu_rdma_dest_init() should do rdma cleanup
> 
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

Queued

> ---
>  migration/rdma.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 41726cc74a8..7e7595faabf 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -4040,7 +4040,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
>  
>      if (ret) {
>          ERROR(errp, "listening on socket!");
> -        goto err;
> +        goto cleanup_rdma;
>      }
>  
>      trace_rdma_start_incoming_migration_after_rdma_listen();
> @@ -4050,7 +4050,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
>          rdma_return_path = qemu_rdma_data_init(host_port, &local_err);
>  
>          if (rdma_return_path == NULL) {
> -            goto err;
> +            goto cleanup_rdma;
>          }
>  
>          qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
> @@ -4059,6 +4059,9 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
>      qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
>                          NULL, (void *)(intptr_t)rdma);
>      return;
> +
> +cleanup_rdma:
> +    qemu_rdma_cleanup(rdma);
>  err:
>      error_propagate(errp, local_err);
>      if (rdma) {
> -- 
> 2.30.2
> 
> 
> 
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-05-26 11:32 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-25  8:05 [PATCH v2 1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path Li Zhijian
2021-05-25  8:05 ` [PATCH v2 2/4] migration/rdma: Fix rdma_addrinfo res leaks Li Zhijian
2021-05-25  8:05 ` [PATCH v2 3/4] migration/rdma: destination: create the return patch after the first accept Li Zhijian
2021-05-25  9:40   ` Dr. David Alan Gilbert
2021-05-25  8:05 ` [PATCH v2 4/4] migration/rdma: source: poll cm_event from return path Li Zhijian
2021-05-25 10:26   ` Dr. David Alan Gilbert
2021-05-26 11:30 ` [PATCH v2 1/4] migration/rdma: cleanup rmda in rdma_start_incoming_migration error path Dr. David Alan Gilbert

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.