All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nicholas Thomas <nick@bytemark.co.uk>
To: kwolf@redhat.com
Cc: Paolo Bonzini <pbonzini@redhat.com>, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 11/12] nbd: switch to asynchronous operation
Date: Fri, 09 Sep 2011 15:52:08 +0100	[thread overview]
Message-ID: <4E6A2818.7070209@bytemark.co.uk> (raw)
In-Reply-To: <1315495505-28906-12-git-send-email-pbonzini@redhat.com>

On 08/09/11 16:25, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/nbd.c |  167 ++++++++++++++++++++++++++++++++++++++--------------------
>  nbd.c       |    8 +++
>  2 files changed, 117 insertions(+), 58 deletions(-)
> 
> diff --git a/block/nbd.c b/block/nbd.c
> index 964caa8..5a75263 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -52,6 +52,9 @@ typedef struct BDRVNBDState {
>      size_t blocksize;
>      char *export_name; /* An NBD server may export several devices */
>  
> +    CoMutex mutex;
> +    Coroutine *coroutine;
> +
>      /* If it begins with  '/', this is a UNIX domain socket. Otherwise,
>       * it's a string of the form <hostname|ip4|\[ip6\]>:port
>       */
> @@ -104,6 +107,37 @@ out:
>      return err;
>  }
>  
> +static void nbd_coroutine_start(BDRVNBDState *s)
> +{
> +    qemu_co_mutex_lock(&s->mutex);
> +    s->coroutine = qemu_coroutine_self();
> +}
> +
> +static void nbd_coroutine_enter(void *opaque)
> +{
> +    BDRVNBDState *s = opaque;
> +    qemu_coroutine_enter(s->coroutine, NULL);
> +}
> +
> +static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request)
> +{
> +    qemu_aio_set_fd_handler(s->sock, NULL, nbd_coroutine_enter, NULL, NULL, s);
> +    return nbd_send_request(s->sock, request);
> +}
> +
> +static int nbd_co_receive_reply(BDRVNBDState *s, struct nbd_reply *reply)
> +{
> +    qemu_aio_set_fd_handler(s->sock, nbd_coroutine_enter, NULL, NULL, NULL, s);
> +    return nbd_receive_reply(s->sock, reply);
> +}
> +
> +static void nbd_coroutine_end(BDRVNBDState *s)
> +{
> +    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL, s);
> +    s->coroutine = NULL;
> +    qemu_co_mutex_unlock(&s->mutex);
> +}
> +
>  static int nbd_establish_connection(BlockDriverState *bs)
>  {
>      BDRVNBDState *s = bs->opaque;
> @@ -163,6 +197,8 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
>      BDRVNBDState *s = bs->opaque;
>      int result;
>  
> +    qemu_co_mutex_init(&s->mutex);
> +
>      /* Pop the config into our state object. Exit if invalid. */
>      result = nbd_config(s, filename, flags);
>      if (result != 0) {
> @@ -177,8 +213,8 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
>      return result;
>  }
>  
> -static int nbd_read(BlockDriverState *bs, int64_t sector_num,
> -                    uint8_t *buf, int nb_sectors)
> +static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
> +                        int nb_sectors, QEMUIOVector *qiov)
>  {
>      BDRVNBDState *s = bs->opaque;
>      struct nbd_request request;
> @@ -189,30 +225,39 @@ static int nbd_read(BlockDriverState *bs, int64_t sector_num,
>      request.from = sector_num * 512;;
>      request.len = nb_sectors * 512;
>  
> -    if (nbd_send_request(s->sock, &request) == -1)
> -        return -errno;
> -
> -    if (nbd_receive_reply(s->sock, &reply) == -1)
> -        return -errno;
> -
> -    if (reply.error !=0)
> -        return -reply.error;
> -
> -    if (reply.handle != request.handle)
> -        return -EIO;
> +    nbd_coroutine_start(s);
> +    if (nbd_co_send_request(s, &request) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    if (nbd_co_receive_reply(s, &reply) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    if (reply.error != 0) {
> +        goto done;
> +    }
> +    if (reply.handle != request.handle) {
> +        reply.error = EIO;
> +        goto done;
> +    }
> +    if (qemu_co_recvv(s->sock, qiov->iov, request.len, 0) != request.len) {
> +        reply.error = EIO;
> +    }
>  
> -    if (nbd_wr_sync(s->sock, buf, request.len, 1) != request.len)
> -        return -EIO;
> +done:
> +    nbd_coroutine_end(s);
> +    return -reply.error;
>  
> -    return 0;
>  }

I'm a bit unsure here, actually. So you lock a mutex, send a request,
wait for a response, then unlock the mutex. Surely this code doesn't
allow more than one request to be in flight at a time?

My approach was to write the request to the socket as soon as possible.
IIRC, QEMU can have up to 16 IOs outstanding, so this gave us "some"
speedup, although I don't have formal before/after benchmarks. For
testing, speed isn't too important, but we're using NBD in production to
run VMs on a 10GigE network with SAS and SSD storage, so we're somewhat
interested in performance :).

If this *is* letting > 1 request be on the wire at a time, then
request.handle needs to be unique to the request. I don't think:

    request.handle = (uint64_t)(intptr_t)bs;

is.


> -static int nbd_write(BlockDriverState *bs, int64_t sector_num,
> -                     const uint8_t *buf, int nb_sectors)
> +static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
> +                         int nb_sectors, QEMUIOVector *qiov)
>  {
>      BDRVNBDState *s = bs->opaque;
>      struct nbd_request request;
>      struct nbd_reply reply;
> +    int ret;
>  
>      request.type = NBD_CMD_WRITE;
>      if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
> @@ -223,25 +268,30 @@ static int nbd_write(BlockDriverState *bs, int64_t sector_num,
>      request.from = sector_num * 512;;
>      request.len = nb_sectors * 512;
>  
> -    if (nbd_send_request(s->sock, &request) == -1)
> -        return -errno;
> -
> -    if (nbd_wr_sync(s->sock, (uint8_t*)buf, request.len, 0) != request.len)
> -        return -EIO;
> -
> -    if (nbd_receive_reply(s->sock, &reply) == -1)
> -        return -errno;
> -
> -    if (reply.error !=0)
> -        return -reply.error;
> -
> -    if (reply.handle != request.handle)
> -        return -EIO;
> +    nbd_coroutine_start(s);
> +    if (nbd_co_send_request(s, &request) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    ret = qemu_co_sendv(s->sock, qiov->iov, request.len, 0);
> +    if (ret != request.len) {
> +        reply.error = EIO;
> +        goto done;
> +    }
> +    if (nbd_co_receive_reply(s, &reply) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    if (reply.handle != request.handle) {
> +        reply.error = EIO;
> +    }
>  
> -    return 0;
> +done:
> +    nbd_coroutine_end(s);
> +    return -reply.error;
>  }
>  
> -static int nbd_flush(BlockDriverState *bs)
> +static int nbd_co_flush(BlockDriverState *bs)
>  {
>      BDRVNBDState *s = bs->opaque;
>      struct nbd_request request;
> @@ -260,19 +310,22 @@ static int nbd_flush(BlockDriverState *bs)
>      request.from = 0;
>      request.len = 0;
>  
> -    if (nbd_send_request(s->sock, &request) == -1)
> -        return -errno;
> -
> -    if (nbd_receive_reply(s->sock, &reply) == -1)
> -        return -errno;
> -
> -    if (reply.error !=0)
> -        return -reply.error;
> -
> -    if (reply.handle != request.handle)
> -        return -EIO;
> +    nbd_coroutine_start(s);
> +    if (nbd_co_send_request(s, &request) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    if (nbd_co_receive_reply(s, &reply) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    if (reply.error == 0 && reply.handle != request.handle) {
> +        reply.error = EIO;
> +    }
>  
> -    return 0;
> +done:
> +    nbd_coroutine_end(s);
> +    return -reply.error;
>  }
>  
>  static int nbd_discard(BlockDriverState *bs, int64_t sector_num,
> @@ -290,19 +343,17 @@ static int nbd_discard(BlockDriverState *bs, int64_t sector_num,
>      request.from = sector_num * 512;;
>      request.len = nb_sectors * 512;
>  
> -    if (nbd_send_request(s->sock, &request) == -1)
> +    if (nbd_send_request(s->sock, &request) == -1) {
>          return -errno;
> -
> -    if (nbd_receive_reply(s->sock, &reply) == -1)
> +    }
> +    if (nbd_receive_reply(s->sock, &reply) == -1) {
>          return -errno;
> -
> -    if (reply.error !=0)
> -        return -reply.error;
> -
> -    if (reply.handle != request.handle)
> +    }
> +    if (reply.error == 0 && reply.handle != request.handle) {
>          return -EIO;
> +    }
>  
> -    return 0;
> +    return -reply.error;
>  }
>  
>  static void nbd_close(BlockDriverState *bs)
> @@ -325,10 +376,10 @@ static BlockDriver bdrv_nbd = {
>      .format_name	= "nbd",
>      .instance_size	= sizeof(BDRVNBDState),
>      .bdrv_file_open	= nbd_open,
> -    .bdrv_read		= nbd_read,
> -    .bdrv_write		= nbd_write,
> +    .bdrv_co_readv	= nbd_co_readv,
> +    .bdrv_co_writev	= nbd_co_writev,
>      .bdrv_close		= nbd_close,
> -    .bdrv_flush		= nbd_flush,
> +    .bdrv_co_flush	= nbd_co_flush,
>      .bdrv_discard	= nbd_discard,
>      .bdrv_getlength	= nbd_getlength,
>      .protocol_name	= "nbd",
> diff --git a/nbd.c b/nbd.c
> index f089904..2f4c6b3 100644
> --- a/nbd.c
> +++ b/nbd.c
> @@ -80,6 +80,14 @@ size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
>  {
>      size_t offset = 0;
>  
> +    if (qemu_in_coroutine()) {
> +        if (do_read) {
> +            return qemu_co_recv(fd, buffer, size);
> +        } else {
> +            return qemu_co_send(fd, buffer, size);
> +        }
> +    }
> +
>      while (offset < size) {
>          ssize_t len;
>  

  reply	other threads:[~2011-09-09 14:52 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
2011-09-08 15:24 ` [Qemu-devel] [PATCH 01/12] nbd: support feature negotiation Paolo Bonzini
2011-09-08 15:24 ` [Qemu-devel] [PATCH 02/12] nbd: sync API definitions with upstream Paolo Bonzini
2011-09-12 14:15   ` Kevin Wolf
2011-09-12 15:08     ` Paolo Bonzini
2011-09-08 15:24 ` [Qemu-devel] [PATCH 03/12] nbd: support NBD_SET_FLAGS ioctl Paolo Bonzini
2011-09-08 15:24 ` [Qemu-devel] [PATCH 04/12] nbd: add support for NBD_CMD_FLUSH Paolo Bonzini
2011-09-13 13:52   ` Kevin Wolf
2011-09-13 15:13     ` Paolo Bonzini
2011-09-08 15:24 ` [Qemu-devel] [PATCH 05/12] nbd: add support for NBD_CMD_FLAG_FUA Paolo Bonzini
2011-09-13 13:55   ` Kevin Wolf
2011-09-08 15:24 ` [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server Paolo Bonzini
2011-09-13 13:58   ` Kevin Wolf
2011-09-13 15:14     ` Paolo Bonzini
2011-09-14 15:44   ` Christoph Hellwig
2011-09-14 16:25     ` Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 07/12] sheepdog: add coroutine_fn markers Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 08/12] add socket_set_block Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 09/12] sheepdog: move coroutine send/recv function to generic code Paolo Bonzini
2011-09-09  4:53   ` MORITA Kazutaka
2011-09-09  8:11     ` [Qemu-devel] [PATCH v2 " Paolo Bonzini
2011-09-13  0:28       ` MORITA Kazutaka
2011-09-13 14:14       ` Kevin Wolf
2011-09-13 15:16         ` Paolo Bonzini
2011-09-13 15:36           ` Kevin Wolf
2011-09-13 15:38             ` Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 10/12] block: add bdrv_co_flush support Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 11/12] nbd: switch to asynchronous operation Paolo Bonzini
2011-09-09 14:52   ` Nicholas Thomas [this message]
2011-09-09 15:03     ` Paolo Bonzini
2011-09-09 15:34       ` Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 12/12] nbd: split requests Paolo Bonzini
2011-09-09 14:52   ` Nicholas Thomas
2011-09-09 15:33     ` Paolo Bonzini
2011-09-09  9:00 ` [Qemu-devel] [PATCH 00/12] nbd improvements Kevin Wolf
2011-09-09 10:29   ` Paolo Bonzini
2011-09-09 10:42     ` Kevin Wolf
2011-09-09 10:50     ` Nicholas Thomas
2011-09-09 11:00       ` Paolo Bonzini
2011-09-09 11:04       ` Kevin Wolf
2011-09-09 14:51         ` Nicholas Thomas
2011-09-14  9:50 ` Kevin Wolf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E6A2818.7070209@bytemark.co.uk \
    --to=nick@bytemark.co.uk \
    --cc=kwolf@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.