From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: "Daniel P. Berrange" <berrange@redhat.com>
Cc: Amit Shah <amit.shah@redhat.com>,
qemu-devel@nongnu.org, Juan Quintela <quintela@redhat.com>
Subject: Re: [Qemu-devel] [PATCH v1 13/22] migration: convert RDMA to use QIOChannel interface
Date: Tue, 2 Feb 2016 20:01:36 +0000 [thread overview]
Message-ID: <20160202200136.GF4498@work-vm> (raw)
In-Reply-To: <1452599056-27357-14-git-send-email-berrange@redhat.com>
* Daniel P. Berrange (berrange@redhat.com) wrote:
> This converts the RDMA code to provide a subclass of
> QIOChannel that uses RDMA for the data transport.
>
> The RDMA code would be much better off it it could
> be split up in a generic RDMA layer, a QIOChannel
> impl based on RMDA, and then the RMDA migration
> glue. This is left as a future exercise for the brave.
>
> Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
> ---
> migration/rdma.c | 260 ++++++++++++++++++++++++++++++++++---------------------
> 1 file changed, 161 insertions(+), 99 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index bffbfaf..3e961cb 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -374,14 +374,19 @@ typedef struct RDMAContext {
> GHashTable *blockmap;
> } RDMAContext;
>
> -/*
> - * Interface to the rest of the migration call stack.
> - */
> -typedef struct QEMUFileRDMA {
> +#define TYPE_QIO_CHANNEL_RDMA "qio-channel-rdma"
> +#define QIO_CHANNEL_RDMA(obj) \
> + OBJECT_CHECK(QIOChannelRDMA, (obj), TYPE_QIO_CHANNEL_RDMA)
> +
> +typedef struct QIOChannelRDMA QIOChannelRDMA;
> +
> +
> +struct QIOChannelRDMA {
> + QIOChannel parent;
> RDMAContext *rdma;
> + QEMUFile *file;
> size_t len;
> - void *file;
> -} QEMUFileRDMA;
> +};
>
> /*
> * Main structure for IB Send/Recv control messages.
> @@ -2518,15 +2523,19 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
> * SEND messages for control only.
> * VM's ram is handled with regular RDMA messages.
> */
> -static ssize_t qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
> - int64_t pos, size_t size)
> -{
> - QEMUFileRDMA *r = opaque;
> - QEMUFile *f = r->file;
> - RDMAContext *rdma = r->rdma;
> - size_t remaining = size;
> - uint8_t * data = (void *) buf;
> +static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
> + const struct iovec *iov,
> + size_t niov,
> + int *fds,
> + size_t nfds,
> + Error **errp)
> +{
> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> + QEMUFile *f = rioc->file;
> + RDMAContext *rdma = rioc->rdma;
> int ret;
> + ssize_t done = 0;
> + size_t i;
>
> CHECK_ERROR_STATE();
>
> @@ -2540,27 +2549,31 @@ static ssize_t qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
> return ret;
> }
>
> - while (remaining) {
> - RDMAControlHeader head;
> + for (i = 0; i < niov; i++) {
> + size_t remaining = iov[i].iov_len;
> + uint8_t * data = (void *)iov[i].iov_base;
> + while (remaining) {
> + RDMAControlHeader head;
>
> - r->len = MIN(remaining, RDMA_SEND_INCREMENT);
> - remaining -= r->len;
> + rioc->len = MIN(remaining, RDMA_SEND_INCREMENT);
> + remaining -= rioc->len;
>
> - /* Guaranteed to fit due to RDMA_SEND_INCREMENT MIN above */
> - head.len = (uint32_t)r->len;
> - head.type = RDMA_CONTROL_QEMU_FILE;
> + head.len = rioc->len;
> + head.type = RDMA_CONTROL_QEMU_FILE;
>
> - ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL);
> + ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL);
>
> - if (ret < 0) {
> - rdma->error_state = ret;
> - return ret;
> - }
> + if (ret < 0) {
> + rdma->error_state = ret;
> + return ret;
> + }
>
> - data += r->len;
> + data += rioc->len;
> + done += rioc->len;
> + }
> }
>
> - return size;
> + return done;
> }
>
> static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf,
> @@ -2585,41 +2598,65 @@ static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf,
> * RDMA links don't use bytestreams, so we have to
> * return bytes to QEMUFile opportunistically.
> */
> -static ssize_t qemu_rdma_get_buffer(void *opaque, uint8_t *buf,
> - int64_t pos, size_t size)
> -{
> - QEMUFileRDMA *r = opaque;
> - RDMAContext *rdma = r->rdma;
> +static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
> + const struct iovec *iov,
> + size_t niov,
> + int **fds,
> + size_t *nfds,
> + Error **errp)
> +{
> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> + RDMAContext *rdma = rioc->rdma;
> RDMAControlHeader head;
> int ret = 0;
> + ssize_t i;
> + size_t done = 0;
>
> CHECK_ERROR_STATE();
>
> - /*
> - * First, we hold on to the last SEND message we
> - * were given and dish out the bytes until we run
> - * out of bytes.
> - */
> - r->len = qemu_rdma_fill(r->rdma, buf, size, 0);
> - if (r->len) {
> - return r->len;
> - }
> + for (i = 0; i < niov; i++) {
> + size_t want = iov[i].iov_len;
> + uint8_t *data = (void *)iov[i].iov_base;
>
> - /*
> - * Once we run out, we block and wait for another
> - * SEND message to arrive.
> - */
> - ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE);
> + /*
> + * First, we hold on to the last SEND message we
> + * were given and dish out the bytes until we run
> + * out of bytes.
> + */
> + ret = qemu_rdma_fill(rioc->rdma, data, want, 0);
> + if (ret > 0) {
> + done += ret;
> + if (ret < want) {
> + break;
> + } else {
> + continue;
> + }
> + }
>
> - if (ret < 0) {
> - rdma->error_state = ret;
> - return ret;
> - }
> + /*
> + * Once we run out, we block and wait for another
> + * SEND message to arrive.
> + */
> + ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE);
>
> - /*
> - * SEND was received with new bytes, now try again.
> - */
> - return qemu_rdma_fill(r->rdma, buf, size, 0);
> + if (ret < 0) {
> + rdma->error_state = ret;
> + return ret;
> + }
> +
> + /*
> + * SEND was received with new bytes, now try again.
> + */
> + ret = qemu_rdma_fill(rioc->rdma, data, want, 0);
> + if (ret > 0) {
> + done += ret;
> + if (ret < want) {
> + break;
> + }
> + }
I don't quite understand the behaviour of this loop.
If rdma_fill returns less than you wanted for the first iov we break.
If it returns 0 then we try and get some more.
The weird thing to me is if we have two iov entries; if the
amount returned by the qemu_rdma_fill happens to match the size of
the 1st iov then I think we end up doing the exchange_recv and
waiting for more. Is that what we want? Why?
Dave
> + }
> + rioc->len = done;
> + return rioc->len;
> }
>
> /*
> @@ -2646,15 +2683,16 @@ static int qemu_rdma_drain_cq(QEMUFile *f, RDMAContext *rdma)
> return 0;
> }
>
> -static int qemu_rdma_close(void *opaque)
> +static int qio_channel_rdma_close(QIOChannel *ioc,
> + Error **errp)
> {
> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> trace_qemu_rdma_close();
> - QEMUFileRDMA *r = opaque;
> - if (r->rdma) {
> - qemu_rdma_cleanup(r->rdma);
> - g_free(r->rdma);
> + if (rioc->rdma) {
> + qemu_rdma_cleanup(rioc->rdma);
> + g_free(rioc->rdma);
> + rioc->rdma = NULL;
> }
> - g_free(r);
> return 0;
> }
>
> @@ -2696,8 +2734,8 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
> ram_addr_t block_offset, ram_addr_t offset,
> size_t size, uint64_t *bytes_sent)
> {
> - QEMUFileRDMA *rfile = opaque;
> - RDMAContext *rdma = rfile->rdma;
> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
> + RDMAContext *rdma = rioc->rdma;
> int ret;
>
> CHECK_ERROR_STATE();
> @@ -2951,8 +2989,8 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque)
> };
> RDMAControlHeader blocks = { .type = RDMA_CONTROL_RAM_BLOCKS_RESULT,
> .repeat = 1 };
> - QEMUFileRDMA *rfile = opaque;
> - RDMAContext *rdma = rfile->rdma;
> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
> + RDMAContext *rdma = rioc->rdma;
> RDMALocalBlocks *local = &rdma->local_ram_blocks;
> RDMAControlHeader head;
> RDMARegister *reg, *registers;
> @@ -3207,9 +3245,10 @@ out:
> * We've already built our local RAMBlock list, but not yet sent the list to
> * the source.
> */
> -static int rdma_block_notification_handle(QEMUFileRDMA *rfile, const char *name)
> +static int
> +rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
> {
> - RDMAContext *rdma = rfile->rdma;
> + RDMAContext *rdma = rioc->rdma;
> int curr;
> int found = -1;
>
> @@ -3251,8 +3290,8 @@ static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data)
> static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
> uint64_t flags, void *data)
> {
> - QEMUFileRDMA *rfile = opaque;
> - RDMAContext *rdma = rfile->rdma;
> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
> + RDMAContext *rdma = rioc->rdma;
>
> CHECK_ERROR_STATE();
>
> @@ -3271,8 +3310,8 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
> uint64_t flags, void *data)
> {
> Error *local_err = NULL, **errp = &local_err;
> - QEMUFileRDMA *rfile = opaque;
> - RDMAContext *rdma = rfile->rdma;
> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
> + RDMAContext *rdma = rioc->rdma;
> RDMAControlHeader head = { .len = 0, .repeat = 1 };
> int ret = 0;
>
> @@ -3368,55 +3407,78 @@ err:
> return ret;
> }
>
> -static int qemu_rdma_get_fd(void *opaque)
> -{
> - QEMUFileRDMA *rfile = opaque;
> - RDMAContext *rdma = rfile->rdma;
> -
> - return rdma->comp_channel->fd;
> -}
> -
> -static const QEMUFileOps rdma_read_ops = {
> - .get_buffer = qemu_rdma_get_buffer,
> - .get_fd = qemu_rdma_get_fd,
> - .close = qemu_rdma_close,
> -};
> -
> static const QEMUFileHooks rdma_read_hooks = {
> .hook_ram_load = rdma_load_hook,
> };
>
> -static const QEMUFileOps rdma_write_ops = {
> - .put_buffer = qemu_rdma_put_buffer,
> - .close = qemu_rdma_close,
> -};
> -
> static const QEMUFileHooks rdma_write_hooks = {
> .before_ram_iterate = qemu_rdma_registration_start,
> .after_ram_iterate = qemu_rdma_registration_stop,
> .save_page = qemu_rdma_save_page,
> };
>
> -static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
> +
> +static void qio_channel_rdma_finalize(Object *obj)
> +{
> + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(obj);
> + if (rioc->rdma) {
> + qemu_rdma_cleanup(rioc->rdma);
> + g_free(rioc->rdma);
> + rioc->rdma = NULL;
> + }
> +}
> +
> +static void qio_channel_rdma_class_init(ObjectClass *klass,
> + void *class_data G_GNUC_UNUSED)
> +{
> + QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
> +
> + ioc_klass->io_writev = qio_channel_rdma_writev;
> + ioc_klass->io_readv = qio_channel_rdma_readv;
> + /* XXX
> + ioc_klass->io_set_blocking = qio_channel_rdma_set_blocking;
> + */
> + ioc_klass->io_close = qio_channel_rdma_close;
> + /* XXX
> + ioc_klass->io_create_watch = qio_channel_rdma_create_watch;
> + */
> +}
> +
> +static const TypeInfo qio_channel_rdma_info = {
> + .parent = TYPE_QIO_CHANNEL,
> + .name = TYPE_QIO_CHANNEL_RDMA,
> + .instance_size = sizeof(QIOChannelRDMA),
> + .instance_finalize = qio_channel_rdma_finalize,
> + .class_init = qio_channel_rdma_class_init,
> +};
> +
> +static void qio_channel_rdma_register_types(void)
> +{
> + type_register_static(&qio_channel_rdma_info);
> +}
> +
> +type_init(qio_channel_rdma_register_types);
> +
> +static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
> {
> - QEMUFileRDMA *r;
> + QIOChannelRDMA *rioc;
>
> if (qemu_file_mode_is_not_valid(mode)) {
> return NULL;
> }
>
> - r = g_new0(QEMUFileRDMA, 1);
> - r->rdma = rdma;
> + rioc = QIO_CHANNEL_RDMA(object_new(TYPE_QIO_CHANNEL_RDMA));
> + rioc->rdma = rdma;
>
> if (mode[0] == 'w') {
> - r->file = qemu_fopen_ops(r, &rdma_write_ops);
> - qemu_file_set_hooks(r->file, &rdma_write_hooks);
> + rioc->file = qemu_fopen_channel_output(QIO_CHANNEL(rioc));
> + qemu_file_set_hooks(rioc->file, &rdma_write_hooks);
> } else {
> - r->file = qemu_fopen_ops(r, &rdma_read_ops);
> - qemu_file_set_hooks(r->file, &rdma_read_hooks);
> + rioc->file = qemu_fopen_channel_input(QIO_CHANNEL(rioc));
> + qemu_file_set_hooks(rioc->file, &rdma_read_hooks);
> }
>
> - return r->file;
> + return rioc->file;
> }
>
> static void rdma_accept_incoming_migration(void *opaque)
> --
> 2.5.0
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
next prev parent reply other threads:[~2016-02-02 20:01 UTC|newest]
Thread overview: 66+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-01-12 11:43 [Qemu-devel] [PATCH v1 00/22] Convert migration to QIOChannel & support TLS Daniel P. Berrange
2016-01-12 11:43 ` [Qemu-devel] [PATCH v1 01/22] s390: use FILE instead of QEMUFile for creating text file Daniel P. Berrange
2016-01-12 11:58 ` Cornelia Huck
2016-01-12 12:01 ` Daniel P. Berrange
2016-01-12 12:05 ` Cornelia Huck
2016-02-12 17:19 ` Dr. David Alan Gilbert
2016-01-12 11:43 ` [Qemu-devel] [PATCH v1 02/22] migration: remove use of qemu_bufopen from vmstate tests Daniel P. Berrange
2016-01-28 17:45 ` Dr. David Alan Gilbert
2016-01-12 11:43 ` [Qemu-devel] [PATCH v1 03/22] migration: ensure qemu_fflush() always writes full data amount Daniel P. Berrange
2016-01-28 17:53 ` Dr. David Alan Gilbert
2016-02-03 13:31 ` Daniel P. Berrange
2016-01-12 11:43 ` [Qemu-devel] [PATCH v1 04/22] migration: split migration hooks out of QEMUFileOps Daniel P. Berrange
2016-01-28 17:57 ` Dr. David Alan Gilbert
2016-01-12 11:43 ` [Qemu-devel] [PATCH v1 05/22] migration: introduce set_blocking function in QEMUFileOps Daniel P. Berrange
2016-01-28 18:00 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 06/22] migration: force QEMUFile to blocking mode for outgoing migration Daniel P. Berrange
2016-01-28 18:17 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 07/22] migration: introduce a new QEMUFile impl based on QIOChannel Daniel P. Berrange
2016-02-02 17:06 ` Dr. David Alan Gilbert
2016-02-03 13:37 ` Daniel P. Berrange
2016-02-12 17:16 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 08/22] migration: convert post-copy to use QIOChannelBuffer Daniel P. Berrange
2016-01-25 19:38 ` Dr. David Alan Gilbert
2016-01-25 22:15 ` Daniel P. Berrange
2016-01-26 18:59 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 09/22] migration: convert unix socket protocol to use QIOChannel Daniel P. Berrange
2016-02-02 18:02 ` Dr. David Alan Gilbert
2016-02-03 11:25 ` Daniel P. Berrange
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 10/22] migration: convert tcp " Daniel P. Berrange
2016-02-02 18:19 ` Dr. David Alan Gilbert
2016-02-03 10:02 ` Daniel P. Berrange
2016-02-03 10:33 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 11/22] migration: convert fd " Daniel P. Berrange
2016-02-02 18:46 ` Dr. David Alan Gilbert
2016-02-03 10:05 ` Daniel P. Berrange
2016-02-03 10:29 ` Dr. David Alan Gilbert
2016-02-03 10:39 ` Daniel P. Berrange
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 12/22] migration: convert exec " Daniel P. Berrange
2016-02-02 18:53 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 13/22] migration: convert RDMA to use QIOChannel interface Daniel P. Berrange
2016-02-02 20:01 ` Dr. David Alan Gilbert [this message]
2016-02-03 11:37 ` Daniel P. Berrange
2016-02-03 13:23 ` Dr. David Alan Gilbert
2016-02-03 13:25 ` Daniel P. Berrange
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 14/22] migration: convert savevm to use QIOChannel for writing to files Daniel P. Berrange
2016-02-03 9:52 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 15/22] migration: delete QEMUFile buffer implementation Daniel P. Berrange
2016-02-03 9:54 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 16/22] migration: delete QEMUSizedBuffer struct Daniel P. Berrange
2016-02-03 9:55 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 17/22] migration: delete QEMUFile sockets implementation Daniel P. Berrange
2016-02-03 9:56 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 18/22] migration: delete QEMUFile stdio implementation Daniel P. Berrange
2016-02-03 9:58 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 19/22] migration: move definition of struct QEMUFile back into qemu-file.c Daniel P. Berrange
2016-02-05 18:32 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 20/22] migration: support TLS encryption with TCP migration backend Daniel P. Berrange
2016-02-12 17:09 ` Dr. David Alan Gilbert
2016-02-12 17:25 ` Daniel P. Berrange
2016-02-15 11:00 ` Daniel P. Berrange
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 21/22] migration: remove support for non-iovec based write handlers Daniel P. Berrange
2016-02-12 15:48 ` Dr. David Alan Gilbert
2016-01-12 11:44 ` [Qemu-devel] [PATCH v1 22/22] migration: remove qemu_get_fd method from QEMUFile Daniel P. Berrange
2016-02-12 15:51 ` Dr. David Alan Gilbert
2016-01-12 11:59 ` [Qemu-devel] [PATCH v1 00/22] Convert migration to QIOChannel & support TLS Daniel P. Berrange
2016-01-20 18:01 ` Daniel P. Berrange
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160202200136.GF4498@work-vm \
--to=dgilbert@redhat.com \
--cc=amit.shah@redhat.com \
--cc=berrange@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=quintela@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).