qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: "Daniel P. Berrange" <berrange@redhat.com>
Cc: Amit Shah <amit.shah@redhat.com>,
	qemu-devel@nongnu.org, Juan Quintela <quintela@redhat.com>
Subject: Re: [Qemu-devel] [PATCH v1 13/22] migration: convert RDMA to use QIOChannel interface
Date: Tue, 2 Feb 2016 20:01:36 +0000	[thread overview]
Message-ID: <20160202200136.GF4498@work-vm> (raw)
In-Reply-To: <1452599056-27357-14-git-send-email-berrange@redhat.com>

* Daniel P. Berrange (berrange@redhat.com) wrote:
> This converts the RDMA code to provide a subclass of
> QIOChannel that uses RDMA for the data transport.
> 
> The RDMA code would be much better off it it could
> be split up in a generic RDMA layer, a QIOChannel
> impl based on RMDA, and then the RMDA migration
> glue. This is left as a future exercise for the brave.
> 
> Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
> ---
>  migration/rdma.c | 260 ++++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 161 insertions(+), 99 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index bffbfaf..3e961cb 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -374,14 +374,19 @@ typedef struct RDMAContext {
>      GHashTable *blockmap;
>  } RDMAContext;
>  
> -/*
> - * Interface to the rest of the migration call stack.
> - */
> -typedef struct QEMUFileRDMA {
> +#define TYPE_QIO_CHANNEL_RDMA "qio-channel-rdma"
> +#define QIO_CHANNEL_RDMA(obj)                                     \
> +    OBJECT_CHECK(QIOChannelRDMA, (obj), TYPE_QIO_CHANNEL_RDMA)
> +
> +typedef struct QIOChannelRDMA QIOChannelRDMA;
> +
> +
> +struct QIOChannelRDMA {
> +    QIOChannel parent;
>      RDMAContext *rdma;
> +    QEMUFile *file;
>      size_t len;
> -    void *file;
> -} QEMUFileRDMA;
> +};
>  
>  /*
>   * Main structure for IB Send/Recv control messages.
> @@ -2518,15 +2523,19 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
>   * SEND messages for control only.
>   * VM's ram is handled with regular RDMA messages.
>   */
> -static ssize_t qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
> -                                    int64_t pos, size_t size)
> -{
> -    QEMUFileRDMA *r = opaque;
> -    QEMUFile *f = r->file;
> -    RDMAContext *rdma = r->rdma;
> -    size_t remaining = size;
> -    uint8_t * data = (void *) buf;
> +static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
> +                                       const struct iovec *iov,
> +                                       size_t niov,
> +                                       int *fds,
> +                                       size_t nfds,
> +                                       Error **errp)
> +{
> +    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> +    QEMUFile *f = rioc->file;
> +    RDMAContext *rdma = rioc->rdma;
>      int ret;
> +    ssize_t done = 0;
> +    size_t i;
>  
>      CHECK_ERROR_STATE();
>  
> @@ -2540,27 +2549,31 @@ static ssize_t qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
>          return ret;
>      }
>  
> -    while (remaining) {
> -        RDMAControlHeader head;
> +    for (i = 0; i < niov; i++) {
> +        size_t remaining = iov[i].iov_len;
> +        uint8_t * data = (void *)iov[i].iov_base;
> +        while (remaining) {
> +            RDMAControlHeader head;
>  
> -        r->len = MIN(remaining, RDMA_SEND_INCREMENT);
> -        remaining -= r->len;
> +            rioc->len = MIN(remaining, RDMA_SEND_INCREMENT);
> +            remaining -= rioc->len;
>  
> -        /* Guaranteed to fit due to RDMA_SEND_INCREMENT MIN above */
> -        head.len = (uint32_t)r->len;
> -        head.type = RDMA_CONTROL_QEMU_FILE;
> +            head.len = rioc->len;
> +            head.type = RDMA_CONTROL_QEMU_FILE;
>  
> -        ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL);
> +            ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL);
>  
> -        if (ret < 0) {
> -            rdma->error_state = ret;
> -            return ret;
> -        }
> +            if (ret < 0) {
> +                rdma->error_state = ret;
> +                return ret;
> +            }
>  
> -        data += r->len;
> +            data += rioc->len;
> +            done += rioc->len;
> +        }
>      }
>  
> -    return size;
> +    return done;
>  }
>  
>  static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf,
> @@ -2585,41 +2598,65 @@ static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf,
>   * RDMA links don't use bytestreams, so we have to
>   * return bytes to QEMUFile opportunistically.
>   */
> -static ssize_t qemu_rdma_get_buffer(void *opaque, uint8_t *buf,
> -                                    int64_t pos, size_t size)
> -{
> -    QEMUFileRDMA *r = opaque;
> -    RDMAContext *rdma = r->rdma;
> +static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
> +                                      const struct iovec *iov,
> +                                      size_t niov,
> +                                      int **fds,
> +                                      size_t *nfds,
> +                                      Error **errp)
> +{
> +    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> +    RDMAContext *rdma = rioc->rdma;
>      RDMAControlHeader head;
>      int ret = 0;
> +    ssize_t i;
> +    size_t done = 0;
>  
>      CHECK_ERROR_STATE();
>  
> -    /*
> -     * First, we hold on to the last SEND message we
> -     * were given and dish out the bytes until we run
> -     * out of bytes.
> -     */
> -    r->len = qemu_rdma_fill(r->rdma, buf, size, 0);
> -    if (r->len) {
> -        return r->len;
> -    }
> +    for (i = 0; i < niov; i++) {
> +        size_t want = iov[i].iov_len;
> +        uint8_t *data = (void *)iov[i].iov_base;
>  
> -    /*
> -     * Once we run out, we block and wait for another
> -     * SEND message to arrive.
> -     */
> -    ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE);
> +        /*
> +         * First, we hold on to the last SEND message we
> +         * were given and dish out the bytes until we run
> +         * out of bytes.
> +         */
> +        ret = qemu_rdma_fill(rioc->rdma, data, want, 0);
> +        if (ret > 0) {
> +            done += ret;
> +            if (ret < want) {
> +                break;
> +            } else {
> +                continue;
> +            }

> +        }
>  
> -    if (ret < 0) {
> -        rdma->error_state = ret;
> -        return ret;
> -    }
> +        /*
> +         * Once we run out, we block and wait for another
> +         * SEND message to arrive.
> +         */
> +        ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE);
>  
> -    /*
> -     * SEND was received with new bytes, now try again.
> -     */
> -    return qemu_rdma_fill(r->rdma, buf, size, 0);
> +        if (ret < 0) {
> +            rdma->error_state = ret;
> +            return ret;
> +        }
> +
> +        /*
> +         * SEND was received with new bytes, now try again.
> +         */
> +        ret = qemu_rdma_fill(rioc->rdma, data, want, 0);
> +        if (ret > 0) {
> +            done += ret;
> +            if (ret < want) {
> +                break;
> +            }
> +        }

I don't quite understand the behaviour of this loop.
If rdma_fill returns less than you wanted for the first iov we break.
If it returns 0 then we try and get some more.
The weird thing to me is if we have two iov entries; if the
amount returned by the qemu_rdma_fill happens to match the size of
the 1st iov then I think we end up doing the exchange_recv and
waiting for more.  Is that what we want? Why?

Dave

> +    }
> +    rioc->len = done;
> +    return rioc->len;
>  }
>  
>  /*
> @@ -2646,15 +2683,16 @@ static int qemu_rdma_drain_cq(QEMUFile *f, RDMAContext *rdma)
>      return 0;
>  }
>  
> -static int qemu_rdma_close(void *opaque)
> +static int qio_channel_rdma_close(QIOChannel *ioc,
> +                                  Error **errp)
>  {
> +    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>      trace_qemu_rdma_close();
> -    QEMUFileRDMA *r = opaque;
> -    if (r->rdma) {
> -        qemu_rdma_cleanup(r->rdma);
> -        g_free(r->rdma);
> +    if (rioc->rdma) {
> +        qemu_rdma_cleanup(rioc->rdma);
> +        g_free(rioc->rdma);
> +        rioc->rdma = NULL;
>      }
> -    g_free(r);
>      return 0;
>  }
>  
> @@ -2696,8 +2734,8 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
>                                    ram_addr_t block_offset, ram_addr_t offset,
>                                    size_t size, uint64_t *bytes_sent)
>  {
> -    QEMUFileRDMA *rfile = opaque;
> -    RDMAContext *rdma = rfile->rdma;
> +    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
> +    RDMAContext *rdma = rioc->rdma;
>      int ret;
>  
>      CHECK_ERROR_STATE();
> @@ -2951,8 +2989,8 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque)
>                               };
>      RDMAControlHeader blocks = { .type = RDMA_CONTROL_RAM_BLOCKS_RESULT,
>                                   .repeat = 1 };
> -    QEMUFileRDMA *rfile = opaque;
> -    RDMAContext *rdma = rfile->rdma;
> +    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
> +    RDMAContext *rdma = rioc->rdma;
>      RDMALocalBlocks *local = &rdma->local_ram_blocks;
>      RDMAControlHeader head;
>      RDMARegister *reg, *registers;
> @@ -3207,9 +3245,10 @@ out:
>   * We've already built our local RAMBlock list, but not yet sent the list to
>   * the source.
>   */
> -static int rdma_block_notification_handle(QEMUFileRDMA *rfile, const char *name)
> +static int
> +rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
>  {
> -    RDMAContext *rdma = rfile->rdma;
> +    RDMAContext *rdma = rioc->rdma;
>      int curr;
>      int found = -1;
>  
> @@ -3251,8 +3290,8 @@ static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data)
>  static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
>                                          uint64_t flags, void *data)
>  {
> -    QEMUFileRDMA *rfile = opaque;
> -    RDMAContext *rdma = rfile->rdma;
> +    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
> +    RDMAContext *rdma = rioc->rdma;
>  
>      CHECK_ERROR_STATE();
>  
> @@ -3271,8 +3310,8 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
>                                         uint64_t flags, void *data)
>  {
>      Error *local_err = NULL, **errp = &local_err;
> -    QEMUFileRDMA *rfile = opaque;
> -    RDMAContext *rdma = rfile->rdma;
> +    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
> +    RDMAContext *rdma = rioc->rdma;
>      RDMAControlHeader head = { .len = 0, .repeat = 1 };
>      int ret = 0;
>  
> @@ -3368,55 +3407,78 @@ err:
>      return ret;
>  }
>  
> -static int qemu_rdma_get_fd(void *opaque)
> -{
> -    QEMUFileRDMA *rfile = opaque;
> -    RDMAContext *rdma = rfile->rdma;
> -
> -    return rdma->comp_channel->fd;
> -}
> -
> -static const QEMUFileOps rdma_read_ops = {
> -    .get_buffer    = qemu_rdma_get_buffer,
> -    .get_fd        = qemu_rdma_get_fd,
> -    .close         = qemu_rdma_close,
> -};
> -
>  static const QEMUFileHooks rdma_read_hooks = {
>      .hook_ram_load = rdma_load_hook,
>  };
>  
> -static const QEMUFileOps rdma_write_ops = {
> -    .put_buffer         = qemu_rdma_put_buffer,
> -    .close              = qemu_rdma_close,
> -};
> -
>  static const QEMUFileHooks rdma_write_hooks = {
>      .before_ram_iterate = qemu_rdma_registration_start,
>      .after_ram_iterate  = qemu_rdma_registration_stop,
>      .save_page          = qemu_rdma_save_page,
>  };
>  
> -static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
> +
> +static void qio_channel_rdma_finalize(Object *obj)
> +{
> +    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(obj);
> +    if (rioc->rdma) {
> +        qemu_rdma_cleanup(rioc->rdma);
> +        g_free(rioc->rdma);
> +        rioc->rdma = NULL;
> +    }
> +}
> +
> +static void qio_channel_rdma_class_init(ObjectClass *klass,
> +                                        void *class_data G_GNUC_UNUSED)
> +{
> +    QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
> +
> +    ioc_klass->io_writev = qio_channel_rdma_writev;
> +    ioc_klass->io_readv = qio_channel_rdma_readv;
> +    /* XXX
> +    ioc_klass->io_set_blocking = qio_channel_rdma_set_blocking;
> +    */
> +    ioc_klass->io_close = qio_channel_rdma_close;
> +    /* XXX
> +    ioc_klass->io_create_watch = qio_channel_rdma_create_watch;
> +    */
> +}
> +
> +static const TypeInfo qio_channel_rdma_info = {
> +    .parent = TYPE_QIO_CHANNEL,
> +    .name = TYPE_QIO_CHANNEL_RDMA,
> +    .instance_size = sizeof(QIOChannelRDMA),
> +    .instance_finalize = qio_channel_rdma_finalize,
> +    .class_init = qio_channel_rdma_class_init,
> +};
> +
> +static void qio_channel_rdma_register_types(void)
> +{
> +    type_register_static(&qio_channel_rdma_info);
> +}
> +
> +type_init(qio_channel_rdma_register_types);
> +
> +static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
>  {
> -    QEMUFileRDMA *r;
> +    QIOChannelRDMA *rioc;
>  
>      if (qemu_file_mode_is_not_valid(mode)) {
>          return NULL;
>      }
>  
> -    r = g_new0(QEMUFileRDMA, 1);
> -    r->rdma = rdma;
> +    rioc = QIO_CHANNEL_RDMA(object_new(TYPE_QIO_CHANNEL_RDMA));
> +    rioc->rdma = rdma;
>  
>      if (mode[0] == 'w') {
> -        r->file = qemu_fopen_ops(r, &rdma_write_ops);
> -        qemu_file_set_hooks(r->file, &rdma_write_hooks);
> +        rioc->file = qemu_fopen_channel_output(QIO_CHANNEL(rioc));
> +        qemu_file_set_hooks(rioc->file, &rdma_write_hooks);
>      } else {
> -        r->file = qemu_fopen_ops(r, &rdma_read_ops);
> -        qemu_file_set_hooks(r->file, &rdma_read_hooks);
> +        rioc->file = qemu_fopen_channel_input(QIO_CHANNEL(rioc));
> +        qemu_file_set_hooks(rioc->file, &rdma_read_hooks);
>      }
>  
> -    return r->file;
> +    return rioc->file;
>  }
>  
>  static void rdma_accept_incoming_migration(void *opaque)
> -- 
> 2.5.0
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

  reply	other threads:[~2016-02-02 20:01 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-12 11:43 [Qemu-devel] [PATCH v1 00/22] Convert migration to QIOChannel & support TLS Daniel P. Berrange
2016-01-12 11:43 ` [Qemu-devel] [PATCH v1 01/22] s390: use FILE instead of QEMUFile for creating text file Daniel P. Berrange
2016-01-12 11:58   ` Cornelia Huck
2016-01-12 12:01     ` Daniel P. Berrange
2016-01-12 12:05       ` Cornelia Huck
2016-02-12 17:19   ` Dr. David Alan Gilbert
2016-01-12 11:43 ` [Qemu-devel] [PATCH v1 02/22] migration: remove use of qemu_bufopen from vmstate tests Daniel P. Berrange
2016-01-28 17:45   ` Dr. David Alan Gilbert
2016-01-12 11:43 ` [Qemu-devel] [PATCH v1 03/22] migration: ensure qemu_fflush() always writes full data amount Daniel P. Berrange
2016-01-28 17:53   ` Dr. David Alan Gilbert
2016-02-03 13:31     ` Daniel P. Berrange
2016-01-12 11:43 ` [Qemu-devel] [PATCH v1 04/22] migration: split migration hooks out of QEMUFileOps Daniel P. Berrange
2016-01-28 17:57   ` Dr. David Alan Gilbert
2016-01-12 11:43 ` [Qemu-devel] [PATCH v1 05/22] migration: introduce set_blocking function in QEMUFileOps Daniel P. Berrange
2016-01-28 18:00   ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 06/22] migration: force QEMUFile to blocking mode for outgoing migration Daniel P. Berrange
2016-01-28 18:17   ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 07/22] migration: introduce a new QEMUFile impl based on QIOChannel Daniel P. Berrange
2016-02-02 17:06   ` Dr. David Alan Gilbert
2016-02-03 13:37     ` Daniel P. Berrange
2016-02-12 17:16       ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 08/22] migration: convert post-copy to use QIOChannelBuffer Daniel P. Berrange
2016-01-25 19:38   ` Dr. David Alan Gilbert
2016-01-25 22:15     ` Daniel P. Berrange
2016-01-26 18:59       ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 09/22] migration: convert unix socket protocol to use QIOChannel Daniel P. Berrange
2016-02-02 18:02   ` Dr. David Alan Gilbert
2016-02-03 11:25     ` Daniel P. Berrange
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 10/22] migration: convert tcp " Daniel P. Berrange
2016-02-02 18:19   ` Dr. David Alan Gilbert
2016-02-03 10:02     ` Daniel P. Berrange
2016-02-03 10:33       ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 11/22] migration: convert fd " Daniel P. Berrange
2016-02-02 18:46   ` Dr. David Alan Gilbert
2016-02-03 10:05     ` Daniel P. Berrange
2016-02-03 10:29       ` Dr. David Alan Gilbert
2016-02-03 10:39         ` Daniel P. Berrange
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 12/22] migration: convert exec " Daniel P. Berrange
2016-02-02 18:53   ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 13/22] migration: convert RDMA to use QIOChannel interface Daniel P. Berrange
2016-02-02 20:01   ` Dr. David Alan Gilbert [this message]
2016-02-03 11:37     ` Daniel P. Berrange
2016-02-03 13:23       ` Dr. David Alan Gilbert
2016-02-03 13:25         ` Daniel P. Berrange
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 14/22] migration: convert savevm to use QIOChannel for writing to files Daniel P. Berrange
2016-02-03  9:52   ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 15/22] migration: delete QEMUFile buffer implementation Daniel P. Berrange
2016-02-03  9:54   ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 16/22] migration: delete QEMUSizedBuffer struct Daniel P. Berrange
2016-02-03  9:55   ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 17/22] migration: delete QEMUFile sockets implementation Daniel P. Berrange
2016-02-03  9:56   ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 18/22] migration: delete QEMUFile stdio implementation Daniel P. Berrange
2016-02-03  9:58   ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 19/22] migration: move definition of struct QEMUFile back into qemu-file.c Daniel P. Berrange
2016-02-05 18:32   ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 20/22] migration: support TLS encryption with TCP migration backend Daniel P. Berrange
2016-02-12 17:09   ` Dr. David Alan Gilbert
2016-02-12 17:25     ` Daniel P. Berrange
2016-02-15 11:00       ` Daniel P. Berrange
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 21/22] migration: remove support for non-iovec based write handlers Daniel P. Berrange
2016-02-12 15:48   ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 22/22] migration: remove qemu_get_fd method from QEMUFile Daniel P. Berrange
2016-02-12 15:51   ` Dr. David Alan Gilbert
2016-01-12 11:59 ` [Qemu-devel] [PATCH v1 00/22] Convert migration to QIOChannel & support TLS Daniel P. Berrange
2016-01-20 18:01 ` Daniel P. Berrange

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160202200136.GF4498@work-vm \
    --to=dgilbert@redhat.com \
    --cc=amit.shah@redhat.com \
    --cc=berrange@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).