All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 00/12] nbd improvements
@ 2011-09-08 15:24 Paolo Bonzini
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 01/12] nbd: support feature negotiation Paolo Bonzini
                   ` (13 more replies)
  0 siblings, 14 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:24 UTC (permalink / raw)
  To: qemu-devel

I find nbd quite useful to test migration, but it is limited:
it can only do synchronous operation, it is not safe because it
does not support flush, and it has no discard either.  qemu-nbd
is also limited to 1MB requests, and the nbd block driver does
not take this into account.

Luckily, flush/FUA support is being worked out by upstream,
and discard can also be added with the same framework (patches
1 to 6).

Asynchronous support is also very similar to what sheepdog is
already doing (patches 7 to 12).

Paolo Bonzini (12):
  nbd: support feature negotiation
  nbd: sync API definitions with upstream
  nbd: support NBD_SET_FLAGS ioctl
  nbd: add support for NBD_CMD_FLUSH
  nbd: add support for NBD_CMD_FLAG_FUA
  nbd: support NBD_CMD_TRIM in the server
  sheepdog: add coroutine_fn markers
  add socket_set_block
  sheepdog: move coroutine send/recv function to generic code
  block: add bdrv_co_flush support
  nbd: switch to asynchronous operation
  nbd: split requests

 block.c          |   53 ++++++++++---
 block/nbd.c      |  225 ++++++++++++++++++++++++++++++++++++++++++++--------
 block/sheepdog.c |  235 +++++++-----------------------------------------------
 block_int.h      |    1 +
 cutils.c         |  108 +++++++++++++++++++++++++
 nbd.c            |   80 +++++++++++++++++--
 nbd.h            |   20 ++++-
 oslib-posix.c    |    7 ++
 oslib-win32.c    |    6 ++
 qemu-common.h    |    3 +
 qemu-coroutine.c |   71 ++++++++++++++++
 qemu-coroutine.h |   26 ++++++
 qemu-nbd.c       |   13 ++--
 qemu_socket.h    |    1 +
 14 files changed, 580 insertions(+), 269 deletions(-)

-- 
1.7.6

^ permalink raw reply	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 01/12] nbd: support feature negotiation
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
@ 2011-09-08 15:24 ` Paolo Bonzini
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 02/12] nbd: sync API definitions with upstream Paolo Bonzini
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:24 UTC (permalink / raw)
  To: qemu-devel

nbd supports writing flags in bytes 24...27 of the header,
and uses that for the read-only flag.  Add support for it
in qemu-nbd.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/nbd.c |    4 ++--
 nbd.c       |   32 +++++++++++++++++++++++++-------
 nbd.h       |    9 ++++++---
 qemu-nbd.c  |   13 ++++++-------
 4 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 55cb2fd..ffc57a9 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -47,6 +47,7 @@
 
 typedef struct BDRVNBDState {
     int sock;
+    uint32_t nbdflags;
     off_t size;
     size_t blocksize;
     char *export_name; /* An NBD server may export several devices */
@@ -110,7 +111,6 @@ static int nbd_establish_connection(BlockDriverState *bs)
     int ret;
     off_t size;
     size_t blocksize;
-    uint32_t nbdflags;
 
     if (s->host_spec[0] == '/') {
         sock = unix_socket_outgoing(s->host_spec);
@@ -125,7 +125,7 @@ static int nbd_establish_connection(BlockDriverState *bs)
     }
 
     /* NBD handshake */
-    ret = nbd_receive_negotiate(sock, s->export_name, &nbdflags, &size,
+    ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size,
                                 &blocksize);
     if (ret == -1) {
         logout("Failed to negotiate with the NBD server\n");
diff --git a/nbd.c b/nbd.c
index e7a585d..07a8e53 100644
--- a/nbd.c
+++ b/nbd.c
@@ -29,6 +29,10 @@
 #include <ctype.h>
 #include <inttypes.h>
 
+#ifdef __linux__
+#include <linux/fs.h>
+#endif
+
 #include "qemu_socket.h"
 
 //#define DEBUG_NBD
@@ -171,7 +175,7 @@ int unix_socket_outgoing(const char *path)
                   Request (type == 2)
 */
 
-int nbd_negotiate(int csock, off_t size)
+int nbd_negotiate(int csock, off_t size, uint32_t flags)
 {
     char buf[8 + 8 + 8 + 128];
 
@@ -179,14 +183,16 @@ int nbd_negotiate(int csock, off_t size)
         [ 0 ..   7]   passwd   ("NBDMAGIC")
         [ 8 ..  15]   magic    (0x00420281861253)
         [16 ..  23]   size
-        [24 .. 151]   reserved (0)
+        [24 ..  27]   flags
+        [28 .. 151]   reserved (0)
      */
 
     TRACE("Beginning negotiation.");
     memcpy(buf, "NBDMAGIC", 8);
     cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
     cpu_to_be64w((uint64_t*)(buf + 16), size);
-    memset(buf + 24, 0, 128);
+    cpu_to_be32w((uint32_t*)(buf + 24), flags | NBD_FLAG_HAS_FLAGS);
+    memset(buf + 28, 0, 124);
 
     if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
         LOG("write failed");
@@ -336,8 +342,8 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
         return 0;
 }
 
-#ifndef _WIN32
-int nbd_init(int fd, int csock, off_t size, size_t blocksize)
+#ifdef __linux__
+int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
 {
     TRACE("Setting block size to %lu", (unsigned long)blocksize);
 
@@ -357,6 +363,18 @@ int nbd_init(int fd, int csock, off_t size, size_t blocksize)
         return -1;
     }
 
+    if (flags & NBD_FLAG_READ_ONLY) {
+        int read_only = 1;
+        TRACE("Setting readonly attribute");
+
+        if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
+            int serrno = errno;
+            LOG("Failed setting read-only attribute");
+            errno = serrno;
+            return -1;
+        }
+    }
+
     TRACE("Clearing NBD socket");
 
     if (ioctl(fd, NBD_CLEAR_SOCK) == -1) {
@@ -547,7 +565,7 @@ static int nbd_send_reply(int csock, struct nbd_reply *reply)
 }
 
 int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
-             off_t *offset, bool readonly, uint8_t *data, int data_size)
+             off_t *offset, uint32_t nbdflags, uint8_t *data, int data_size)
 {
     struct nbd_request request;
     struct nbd_reply reply;
@@ -631,7 +649,7 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
             return -1;
         }
 
-        if (readonly) {
+        if (nbdflags & NBD_FLAG_READ_ONLY) {
             TRACE("Server is read-only, return error");
             reply.error = 1;
         } else {
diff --git a/nbd.h b/nbd.h
index 96f77fe..938a021 100644
--- a/nbd.h
+++ b/nbd.h
@@ -39,6 +39,9 @@ struct nbd_reply {
     uint64_t handle;
 } QEMU_PACKED;
 
+#define NBD_FLAG_HAS_FLAGS      (1 << 0)        /* Flags are there */
+#define NBD_FLAG_READ_ONLY      (1 << 1)        /* Device is read-only */
+
 enum {
     NBD_CMD_READ = 0,
     NBD_CMD_WRITE = 1,
@@ -55,14 +58,14 @@ int tcp_socket_incoming_spec(const char *address_and_port);
 int unix_socket_outgoing(const char *path);
 int unix_socket_incoming(const char *path);
 
-int nbd_negotiate(int csock, off_t size);
+int nbd_negotiate(int csock, off_t size, uint32_t flags);
 int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
                           off_t *size, size_t *blocksize);
-int nbd_init(int fd, int csock, off_t size, size_t blocksize);
+int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize);
 int nbd_send_request(int csock, struct nbd_request *request);
 int nbd_receive_reply(int csock, struct nbd_reply *reply);
 int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
-             off_t *offset, bool readonly, uint8_t *data, int data_size);
+             off_t *offset, uint32_t nbdflags, uint8_t *data, int data_size);
 int nbd_client(int fd);
 int nbd_disconnect(int fd);
 
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 0b25a4d..58a3e16 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -185,7 +185,7 @@ int main(int argc, char **argv)
     BlockDriverState *bs;
     off_t dev_offset = 0;
     off_t offset = 0;
-    bool readonly = false;
+    uint32_t nbdflags = 0;
     bool disconnect = false;
     const char *bindto = "0.0.0.0";
     int port = NBD_DEFAULT_PORT;
@@ -230,7 +230,6 @@ int main(int argc, char **argv)
     int nb_fds = 0;
     int max_fd;
     int persistent = 0;
-    uint32_t nbdflags;
 
     while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
         switch (ch) {
@@ -263,7 +262,7 @@ int main(int argc, char **argv)
             }
             break;
         case 'r':
-            readonly = true;
+            nbdflags |= NBD_FLAG_READ_ONLY;
             flags &= ~BDRV_O_RDWR;
             break;
         case 'P':
@@ -398,13 +397,13 @@ int main(int argc, char **argv)
             }
 
             ret = nbd_receive_negotiate(sock, NULL, &nbdflags,
-					&size, &blocksize);
+                                        &size, &blocksize);
             if (ret == -1) {
                 ret = 1;
                 goto out;
             }
 
-            ret = nbd_init(fd, sock, size, blocksize);
+            ret = nbd_init(fd, sock, nbdflags, size, blocksize);
             if (ret == -1) {
                 ret = 1;
                 goto out;
@@ -463,7 +462,7 @@ int main(int argc, char **argv)
         for (i = 1; i < nb_fds && ret; i++) {
             if (FD_ISSET(sharing_fds[i], &fds)) {
                 if (nbd_trip(bs, sharing_fds[i], fd_size, dev_offset,
-                    &offset, readonly, data, NBD_BUFFER_SIZE) != 0) {
+                    &offset, nbdflags, data, NBD_BUFFER_SIZE) != 0) {
                     close(sharing_fds[i]);
                     nb_fds--;
                     sharing_fds[i] = sharing_fds[nb_fds];
@@ -479,7 +478,7 @@ int main(int argc, char **argv)
                                              (struct sockaddr *)&addr,
                                              &addr_len);
                 if (sharing_fds[nb_fds] != -1 &&
-                    nbd_negotiate(sharing_fds[nb_fds], fd_size) != -1) {
+                    nbd_negotiate(sharing_fds[nb_fds], fd_size, nbdflags) != -1) {
                         if (sharing_fds[nb_fds] > max_fd)
                             max_fd = sharing_fds[nb_fds];
                         nb_fds++;
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 02/12] nbd: sync API definitions with upstream
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 01/12] nbd: support feature negotiation Paolo Bonzini
@ 2011-09-08 15:24 ` Paolo Bonzini
  2011-09-12 14:15   ` Kevin Wolf
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 03/12] nbd: support NBD_SET_FLAGS ioctl Paolo Bonzini
                   ` (11 subsequent siblings)
  13 siblings, 1 reply; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:24 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 nbd.c |    2 ++
 nbd.h |   11 ++++++++++-
 2 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/nbd.c b/nbd.c
index 07a8e53..9ed2239 100644
--- a/nbd.c
+++ b/nbd.c
@@ -66,6 +66,8 @@
 #define NBD_PRINT_DEBUG         _IO(0xab, 6)
 #define NBD_SET_SIZE_BLOCKS     _IO(0xab, 7)
 #define NBD_DISCONNECT          _IO(0xab, 8)
+#define NBD_SET_TIMEOUT         _IO(0xab, 9)
+#define NBD_SET_FLAGS           _IO(0xab, 10)
 
 #define NBD_OPT_EXPORT_NAME     (1 << 0)
 
diff --git a/nbd.h b/nbd.h
index 938a021..41eb3c8 100644
--- a/nbd.h
+++ b/nbd.h
@@ -41,11 +41,20 @@ struct nbd_reply {
 
 #define NBD_FLAG_HAS_FLAGS      (1 << 0)        /* Flags are there */
 #define NBD_FLAG_READ_ONLY      (1 << 1)        /* Device is read-only */
+#define NBD_FLAG_SEND_FLUSH     (1 << 2)        /* Send FLUSH */
+#define NBD_FLAG_SEND_FUA       (1 << 3)        /* Send FUA (Force Unit Access) */
+#define NBD_FLAG_ROTATIONAL     (1 << 4)        /* Use elevator algorithm - rotational media */
+#define NBD_FLAG_SEND_TRIM      (1 << 5)        /* Send TRIM (discard) */
+
+#define NBD_CMD_MASK_COMMAND	0x0000ffff
+#define NBD_CMD_FLAG_FUA	(1 << 16)
 
 enum {
     NBD_CMD_READ = 0,
     NBD_CMD_WRITE = 1,
-    NBD_CMD_DISC = 2
+    NBD_CMD_DISC = 2,
+    NBD_CMD_FLUSH = 3,
+    NBD_CMD_TRIM = 4
 };
 
 #define NBD_DEFAULT_PORT	10809
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 03/12] nbd: support NBD_SET_FLAGS ioctl
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 01/12] nbd: support feature negotiation Paolo Bonzini
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 02/12] nbd: sync API definitions with upstream Paolo Bonzini
@ 2011-09-08 15:24 ` Paolo Bonzini
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 04/12] nbd: add support for NBD_CMD_FLUSH Paolo Bonzini
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:24 UTC (permalink / raw)
  To: qemu-devel

The nbd kernel module cannot enable DISCARD requests unless it is
informed about it.  The flags field in the header is used for this,
and this patch adds support for it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 nbd.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/nbd.c b/nbd.c
index 9ed2239..30cd78f 100644
--- a/nbd.c
+++ b/nbd.c
@@ -377,6 +377,14 @@ int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
         }
     }
 
+    if (ioctl(fd, NBD_SET_FLAGS, flags) < 0
+        && errno != ENOTTY) {
+        int serrno = errno;
+        LOG("Failed setting flags");
+        errno = serrno;
+        return -1;
+    }
+
     TRACE("Clearing NBD socket");
 
     if (ioctl(fd, NBD_CLEAR_SOCK) == -1) {
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 04/12] nbd: add support for NBD_CMD_FLUSH
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (2 preceding siblings ...)
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 03/12] nbd: support NBD_SET_FLAGS ioctl Paolo Bonzini
@ 2011-09-08 15:24 ` Paolo Bonzini
  2011-09-13 13:52   ` Kevin Wolf
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 05/12] nbd: add support for NBD_CMD_FLAG_FUA Paolo Bonzini
                   ` (9 subsequent siblings)
  13 siblings, 1 reply; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:24 UTC (permalink / raw)
  To: qemu-devel

Note for the brace police: the style in this commit and the following
is consistent with the rest of the file.  It is then fixed together with
the introduction of coroutines.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/nbd.c |   31 +++++++++++++++++++++++++++++++
 nbd.c       |   14 +++++++++++++-
 2 files changed, 44 insertions(+), 1 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index ffc57a9..4a195dc 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -237,6 +237,36 @@ static int nbd_write(BlockDriverState *bs, int64_t sector_num,
     return 0;
 }
 
+static int nbd_flush(BlockDriverState *bs)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+
+    if (!(s->nbdflags & NBD_FLAG_SEND_FLUSH)) {
+        return 0;
+    }
+
+    request.type = NBD_CMD_FLUSH;
+    request.handle = (uint64_t)(intptr_t)bs;
+    request.from = 0;
+    request.len = 0;
+
+    if (nbd_send_request(s->sock, &request) == -1)
+        return -errno;
+
+    if (nbd_receive_reply(s->sock, &reply) == -1)
+        return -errno;
+
+    if (reply.error !=0)
+        return -reply.error;
+
+    if (reply.handle != request.handle)
+        return -EIO;
+
+    return 0;
+}
+
 static void nbd_close(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
@@ -260,6 +290,7 @@ static BlockDriver bdrv_nbd = {
     .bdrv_read		= nbd_read,
     .bdrv_write		= nbd_write,
     .bdrv_close		= nbd_close,
+    .bdrv_flush		= nbd_flush,
     .bdrv_getlength	= nbd_getlength,
     .protocol_name	= "nbd",
 };
diff --git a/nbd.c b/nbd.c
index 30cd78f..4dbbc62 100644
--- a/nbd.c
+++ b/nbd.c
@@ -193,7 +193,8 @@ int nbd_negotiate(int csock, off_t size, uint32_t flags)
     memcpy(buf, "NBDMAGIC", 8);
     cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
     cpu_to_be64w((uint64_t*)(buf + 16), size);
-    cpu_to_be32w((uint32_t*)(buf + 24), flags | NBD_FLAG_HAS_FLAGS);
+    cpu_to_be32w((uint32_t*)(buf + 24),
+                 flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH);
     memset(buf + 28, 0, 124);
 
     if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
@@ -682,6 +683,18 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
         TRACE("Request type is DISCONNECT");
         errno = 0;
         return 1;
+    case NBD_CMD_FLUSH:
+        TRACE("Request type is FLUSH");
+
+        if (bdrv_flush(bs) == -1) {
+            LOG("flush failed");
+            errno = EINVAL;
+            return -1;
+        }
+
+        if (nbd_send_reply(csock, &reply) == -1)
+            return -1;
+        break;
     default:
         LOG("invalid request type (%u) received", request.type);
         errno = EINVAL;
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 05/12] nbd: add support for NBD_CMD_FLAG_FUA
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (3 preceding siblings ...)
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 04/12] nbd: add support for NBD_CMD_FLUSH Paolo Bonzini
@ 2011-09-08 15:24 ` Paolo Bonzini
  2011-09-13 13:55   ` Kevin Wolf
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server Paolo Bonzini
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:24 UTC (permalink / raw)
  To: qemu-devel

The server can use it to issue a flush automatically after a
write.  The client can also use it to mimic a write-through
cache.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/nbd.c |    8 ++++++++
 nbd.c       |   13 +++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 4a195dc..5a7812c 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -215,6 +215,10 @@ static int nbd_write(BlockDriverState *bs, int64_t sector_num,
     struct nbd_reply reply;
 
     request.type = NBD_CMD_WRITE;
+    if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
+        request.type |= NBD_CMD_FLAG_FUA;
+    }
+
     request.handle = (uint64_t)(intptr_t)bs;
     request.from = sector_num * 512;;
     request.len = nb_sectors * 512;
@@ -248,6 +252,10 @@ static int nbd_flush(BlockDriverState *bs)
     }
 
     request.type = NBD_CMD_FLUSH;
+    if (s->nbdflags & NBD_FLAG_SEND_FUA) {
+        request.type |= NBD_CMD_FLAG_FUA;
+    }
+
     request.handle = (uint64_t)(intptr_t)bs;
     request.from = 0;
     request.len = 0;
diff --git a/nbd.c b/nbd.c
index 4dbbc62..b65fb4a 100644
--- a/nbd.c
+++ b/nbd.c
@@ -194,7 +194,8 @@ int nbd_negotiate(int csock, off_t size, uint32_t flags)
     cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
     cpu_to_be64w((uint64_t*)(buf + 16), size);
     cpu_to_be32w((uint32_t*)(buf + 24),
-                 flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH);
+                 flags | NBD_FLAG_HAS_FLAGS |
+                 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
     memset(buf + 28, 0, 124);
 
     if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
@@ -614,7 +615,7 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
     reply.handle = request.handle;
     reply.error = 0;
 
-    switch (request.type) {
+    switch (request.type & NBD_CMD_MASK_COMMAND) {
     case NBD_CMD_READ:
         TRACE("Request type is READ");
 
@@ -674,6 +675,14 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
             }
 
             *offset += request.len;
+
+            if (request.type & NBD_CMD_FLAG_FUA) {
+                if (bdrv_flush(bs) == -1) {
+                    LOG("flush failed");
+                    errno = EINVAL;
+                    return -1;
+                }
+            }
         }
 
         if (nbd_send_reply(csock, &reply) == -1)
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (4 preceding siblings ...)
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 05/12] nbd: add support for NBD_CMD_FLAG_FUA Paolo Bonzini
@ 2011-09-08 15:24 ` Paolo Bonzini
  2011-09-13 13:58   ` Kevin Wolf
  2011-09-14 15:44   ` Christoph Hellwig
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 07/12] sheepdog: add coroutine_fn markers Paolo Bonzini
                   ` (7 subsequent siblings)
  13 siblings, 2 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:24 UTC (permalink / raw)
  To: qemu-devel

Map it to bdrv_discard.  The server can now expose NBD_FLAG_SEND_TRIM.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/nbd.c |   31 +++++++++++++++++++++++++++++++
 nbd.c       |    9 ++++++++-
 2 files changed, 39 insertions(+), 1 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 5a7812c..964caa8 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -275,6 +275,36 @@ static int nbd_flush(BlockDriverState *bs)
     return 0;
 }
 
+static int nbd_discard(BlockDriverState *bs, int64_t sector_num,
+                       int nb_sectors)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+
+    if (!(s->nbdflags & NBD_FLAG_SEND_TRIM)) {
+        return 0;
+    }
+    request.type = NBD_CMD_TRIM;
+    request.handle = (uint64_t)(intptr_t)bs;
+    request.from = sector_num * 512;;
+    request.len = nb_sectors * 512;
+
+    if (nbd_send_request(s->sock, &request) == -1)
+        return -errno;
+
+    if (nbd_receive_reply(s->sock, &reply) == -1)
+        return -errno;
+
+    if (reply.error !=0)
+        return -reply.error;
+
+    if (reply.handle != request.handle)
+        return -EIO;
+
+    return 0;
+}
+
 static void nbd_close(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
@@ -299,6 +329,7 @@ static BlockDriver bdrv_nbd = {
     .bdrv_write		= nbd_write,
     .bdrv_close		= nbd_close,
     .bdrv_flush		= nbd_flush,
+    .bdrv_discard	= nbd_discard,
     .bdrv_getlength	= nbd_getlength,
     .protocol_name	= "nbd",
 };
diff --git a/nbd.c b/nbd.c
index b65fb4a..f089904 100644
--- a/nbd.c
+++ b/nbd.c
@@ -194,7 +194,7 @@ int nbd_negotiate(int csock, off_t size, uint32_t flags)
     cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
     cpu_to_be64w((uint64_t*)(buf + 16), size);
     cpu_to_be32w((uint32_t*)(buf + 24),
-                 flags | NBD_FLAG_HAS_FLAGS |
+                 flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
                  NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
     memset(buf + 28, 0, 124);
 
@@ -703,6 +703,13 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
         if (nbd_send_reply(csock, &reply) == -1)
             return -1;
         break;
+    case NBD_CMD_TRIM:
+        TRACE("Request type is TRIM");
+        bdrv_discard(bs, (request.from + dev_offset) / 512,
+                     request.len / 512);
+        if (nbd_send_reply(csock, &reply) == -1)
+            return -1;
+        break;
     default:
         LOG("invalid request type (%u) received", request.type);
         errno = EINVAL;
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 07/12] sheepdog: add coroutine_fn markers
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (5 preceding siblings ...)
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server Paolo Bonzini
@ 2011-09-08 15:25 ` Paolo Bonzini
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 08/12] add socket_set_block Paolo Bonzini
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: MORITA Kazutaka

This makes the following patch easier to review.

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/sheepdog.c |   14 +++++++-------
 1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index c1f6e07..af696a5 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -396,7 +396,7 @@ static inline int free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
     return !QLIST_EMPTY(&acb->aioreq_head);
 }
 
-static void sd_finish_aiocb(SheepdogAIOCB *acb)
+static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
 {
     if (!acb->canceled) {
         qemu_coroutine_enter(acb->coroutine, NULL);
@@ -735,7 +735,7 @@ out:
     return ret;
 }
 
-static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
+static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
                            struct iovec *iov, int niov, int create,
                            enum AIOCBState aiocb_type);
 
@@ -743,7 +743,7 @@ static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
  * This function searchs pending requests to the object `oid', and
  * sends them.
  */
-static void send_pending_req(BDRVSheepdogState *s, uint64_t oid, uint32_t id)
+static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid, uint32_t id)
 {
     AIOReq *aio_req, *next;
     SheepdogAIOCB *acb;
@@ -777,7 +777,7 @@ static void send_pending_req(BDRVSheepdogState *s, uint64_t oid, uint32_t id)
  * This function is registered as a fd handler, and called from the
  * main loop when s->fd is ready for reading responses.
  */
-static void aio_read_response(void *opaque)
+static void coroutine_fn aio_read_response(void *opaque)
 {
     SheepdogObjRsp rsp;
     BDRVSheepdogState *s = opaque;
@@ -1064,7 +1064,7 @@ out:
     return ret;
 }
 
-static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
+static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
                            struct iovec *iov, int niov, int create,
                            enum AIOCBState aiocb_type)
 {
@@ -1517,7 +1517,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
  * update metadata, this sends a write request to the vdi object.
  * Otherwise, this switches back to sd_co_readv/writev.
  */
-static void sd_write_done(SheepdogAIOCB *acb)
+static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
 {
     int ret;
     BDRVSheepdogState *s = acb->common.bs->opaque;
@@ -1615,7 +1615,7 @@ out:
  * Returns 1 when we need to wait a response, 0 when there is no sent
  * request and -errno in error cases.
  */
-static int sd_co_rw_vector(void *p)
+static int coroutine_fn sd_co_rw_vector(void *p)
 {
     SheepdogAIOCB *acb = p;
     int ret = 0;
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 08/12] add socket_set_block
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (6 preceding siblings ...)
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 07/12] sheepdog: add coroutine_fn markers Paolo Bonzini
@ 2011-09-08 15:25 ` Paolo Bonzini
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 09/12] sheepdog: move coroutine send/recv function to generic code Paolo Bonzini
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: MORITA Kazutaka

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 oslib-posix.c |    7 +++++++
 oslib-win32.c |    6 ++++++
 qemu_socket.h |    1 +
 3 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/oslib-posix.c b/oslib-posix.c
index 196099c..e13e6d4 100644
--- a/oslib-posix.c
+++ b/oslib-posix.c
@@ -91,6 +91,13 @@ void qemu_vfree(void *ptr)
     free(ptr);
 }
 
+void socket_set_block(int fd)
+{
+    int f;
+    f = fcntl(fd, F_GETFL);
+    fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+}
+
 void socket_set_nonblock(int fd)
 {
     int f;
diff --git a/oslib-win32.c b/oslib-win32.c
index 5f0759f..5e3de7d 100644
--- a/oslib-win32.c
+++ b/oslib-win32.c
@@ -73,6 +73,12 @@ void qemu_vfree(void *ptr)
     VirtualFree(ptr, 0, MEM_RELEASE);
 }
 
+void socket_set_block(int fd)
+{
+    unsigned long opt = 0;
+    ioctlsocket(fd, FIONBIO, &opt);
+}
+
 void socket_set_nonblock(int fd)
 {
     unsigned long opt = 1;
diff --git a/qemu_socket.h b/qemu_socket.h
index 180e4db..9e32fac 100644
--- a/qemu_socket.h
+++ b/qemu_socket.h
@@ -35,6 +35,7 @@ int inet_aton(const char *cp, struct in_addr *ia);
 /* misc helpers */
 int qemu_socket(int domain, int type, int protocol);
 int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
+void socket_set_block(int fd);
 void socket_set_nonblock(int fd);
 int send_all(int fd, const void *buf, int len1);
 
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 09/12] sheepdog: move coroutine send/recv function to generic code
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (7 preceding siblings ...)
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 08/12] add socket_set_block Paolo Bonzini
@ 2011-09-08 15:25 ` Paolo Bonzini
  2011-09-09  4:53   ` MORITA Kazutaka
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 10/12] block: add bdrv_co_flush support Paolo Bonzini
                   ` (4 subsequent siblings)
  13 siblings, 1 reply; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: MORITA Kazutaka

Outside coroutines, avoid busy waiting on EAGAIN by temporarily
making the socket blocking.

The API of qemu_recvv/qemu_sendv is slightly different from
do_readv/do_writev because they do not handle coroutines.  It
returns the number of bytes written before encountering an
EAGAIN.  The specificity of yielding on EAGAIN is entirely in
qemu-coroutine.c.

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/sheepdog.c |  221 +++++------------------------------------------------
 cutils.c         |  108 ++++++++++++++++++++++++++
 qemu-common.h    |    3 +
 qemu-coroutine.c |   71 +++++++++++++++++
 qemu-coroutine.h |   26 +++++++
 5 files changed, 229 insertions(+), 200 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index af696a5..188a8d8 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -443,129 +443,6 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
     return acb;
 }
 
-#ifdef _WIN32
-
-struct msghdr {
-    struct iovec *msg_iov;
-    size_t        msg_iovlen;
-};
-
-static ssize_t sendmsg(int s, const struct msghdr *msg, int flags)
-{
-    size_t size = 0;
-    char *buf, *p;
-    int i, ret;
-
-    /* count the msg size */
-    for (i = 0; i < msg->msg_iovlen; i++) {
-        size += msg->msg_iov[i].iov_len;
-    }
-    buf = g_malloc(size);
-
-    p = buf;
-    for (i = 0; i < msg->msg_iovlen; i++) {
-        memcpy(p, msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len);
-        p += msg->msg_iov[i].iov_len;
-    }
-
-    ret = send(s, buf, size, flags);
-
-    g_free(buf);
-    return ret;
-}
-
-static ssize_t recvmsg(int s, struct msghdr *msg, int flags)
-{
-    size_t size = 0;
-    char *buf, *p;
-    int i, ret;
-
-    /* count the msg size */
-    for (i = 0; i < msg->msg_iovlen; i++) {
-        size += msg->msg_iov[i].iov_len;
-    }
-    buf = g_malloc(size);
-
-    ret = qemu_recv(s, buf, size, flags);
-    if (ret < 0) {
-        goto out;
-    }
-
-    p = buf;
-    for (i = 0; i < msg->msg_iovlen; i++) {
-        memcpy(msg->msg_iov[i].iov_base, p, msg->msg_iov[i].iov_len);
-        p += msg->msg_iov[i].iov_len;
-    }
-out:
-    g_free(buf);
-    return ret;
-}
-
-#endif
-
-/*
- * Send/recv data with iovec buffers
- *
- * This function send/recv data from/to the iovec buffer directly.
- * The first `offset' bytes in the iovec buffer are skipped and next
- * `len' bytes are used.
- *
- * For example,
- *
- *   do_send_recv(sockfd, iov, len, offset, 1);
- *
- * is equals to
- *
- *   char *buf = malloc(size);
- *   iov_to_buf(iov, iovcnt, buf, offset, size);
- *   send(sockfd, buf, size, 0);
- *   free(buf);
- */
-static int do_send_recv(int sockfd, struct iovec *iov, int len, int offset,
-                        int write)
-{
-    struct msghdr msg;
-    int ret, diff;
-
-    memset(&msg, 0, sizeof(msg));
-    msg.msg_iov = iov;
-    msg.msg_iovlen = 1;
-
-    len += offset;
-
-    while (iov->iov_len < len) {
-        len -= iov->iov_len;
-
-        iov++;
-        msg.msg_iovlen++;
-    }
-
-    diff = iov->iov_len - len;
-    iov->iov_len -= diff;
-
-    while (msg.msg_iov->iov_len <= offset) {
-        offset -= msg.msg_iov->iov_len;
-
-        msg.msg_iov++;
-        msg.msg_iovlen--;
-    }
-
-    msg.msg_iov->iov_base = (char *) msg.msg_iov->iov_base + offset;
-    msg.msg_iov->iov_len -= offset;
-
-    if (write) {
-        ret = sendmsg(sockfd, &msg, 0);
-    } else {
-        ret = recvmsg(sockfd, &msg, 0);
-    }
-
-    msg.msg_iov->iov_base = (char *) msg.msg_iov->iov_base - offset;
-    msg.msg_iov->iov_len += offset;
-
-    iov->iov_len += diff;
-    return ret;
-}
-
 static int connect_to_sdog(const char *addr, const char *port)
 {
     char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
@@ -618,65 +495,6 @@ success:
     return fd;
 }
 
-static int do_readv_writev(int sockfd, struct iovec *iov, int len,
-                           int iov_offset, int write)
-{
-    int ret;
-again:
-    ret = do_send_recv(sockfd, iov, len, iov_offset, write);
-    if (ret < 0) {
-        if (errno == EINTR) {
-            goto again;
-        }
-        if (errno == EAGAIN) {
-            if (qemu_in_coroutine()) {
-                qemu_coroutine_yield();
-            }
-            goto again;
-        }
-        error_report("failed to recv a rsp, %s", strerror(errno));
-        return 1;
-    }
-
-    iov_offset += ret;
-    len -= ret;
-    if (len) {
-        goto again;
-    }
-
-    return 0;
-}
-
-static int do_readv(int sockfd, struct iovec *iov, int len, int iov_offset)
-{
-    return do_readv_writev(sockfd, iov, len, iov_offset, 0);
-}
-
-static int do_writev(int sockfd, struct iovec *iov, int len, int iov_offset)
-{
-    return do_readv_writev(sockfd, iov, len, iov_offset, 1);
-}
-
-static int do_read_write(int sockfd, void *buf, int len, int write)
-{
-    struct iovec iov;
-
-    iov.iov_base = buf;
-    iov.iov_len = len;
-
-    return do_readv_writev(sockfd, &iov, len, 0, write);
-}
-
-static int do_read(int sockfd, void *buf, int len)
-{
-    return do_read_write(sockfd, buf, len, 0);
-}
-
-static int do_write(int sockfd, void *buf, int len)
-{
-    return do_read_write(sockfd, buf, len, 1);
-}
-
 static int send_req(int sockfd, SheepdogReq *hdr, void *data,
                     unsigned int *wlen)
 {
@@ -691,10 +509,9 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
         iov[1].iov_len = *wlen;
     }
 
-    ret = do_writev(sockfd, iov, sizeof(*hdr) + *wlen, 0);
-    if (ret) {
+    ret = qemu_sendv(sockfd, iov, sizeof(*hdr) + *wlen, 0);
+    if (ret < 0) {
         error_report("failed to send a req, %s", strerror(errno));
-        ret = -1;
     }
 
     return ret;
@@ -704,17 +521,19 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
                   unsigned int *wlen, unsigned int *rlen)
 {
     int ret;
+    struct iovec iov;
 
+    socket_set_block(sockfd);
     ret = send_req(sockfd, hdr, data, wlen);
-    if (ret) {
-        ret = -1;
+    if (ret < 0) {
         goto out;
     }
 
-    ret = do_read(sockfd, hdr, sizeof(*hdr));
-    if (ret) {
+    iov.iov_base = hdr;
+    iov.iov_len = sizeof(*hdr);
+    ret = qemu_recvv(sockfd, &iov, sizeof(*hdr), 0);
+    if (ret < 0) {
         error_report("failed to get a rsp, %s", strerror(errno));
-        ret = -1;
         goto out;
     }
 
@@ -723,15 +542,17 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
     }
 
     if (*rlen) {
-        ret = do_read(sockfd, data, *rlen);
-        if (ret) {
+        iov.iov_base = data;
+        iov.iov_len = *rlen;
+        ret = qemu_recvv(sockfd, &iov, *rlen, 0);
+        if (ret < 0) {
             error_report("failed to get the data, %s", strerror(errno));
-            ret = -1;
             goto out;
         }
     }
     ret = 0;
 out:
+    socket_set_nonblock(sockfd);
     return ret;
 }
 
@@ -793,8 +614,8 @@ static void coroutine_fn aio_read_response(void *opaque)
     }
 
     /* read a header */
-    ret = do_read(fd, &rsp, sizeof(rsp));
-    if (ret) {
+    ret = qemu_co_recv(fd, &rsp, sizeof(rsp));
+    if (ret < 0) {
         error_report("failed to get the header, %s", strerror(errno));
         goto out;
     }
@@ -839,9 +660,9 @@ static void coroutine_fn aio_read_response(void *opaque)
         }
         break;
     case AIOCB_READ_UDATA:
-        ret = do_readv(fd, acb->qiov->iov, rsp.data_length,
-                       aio_req->iov_offset);
-        if (ret) {
+        ret = qemu_co_recvv(fd, acb->qiov->iov, rsp.data_length,
+                            aio_req->iov_offset);
+        if (ret < 0) {
             error_report("failed to get the data, %s", strerror(errno));
             goto out;
         }
@@ -1114,15 +935,15 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     set_cork(s->fd, 1);
 
     /* send a header */
-    ret = do_write(s->fd, &hdr, sizeof(hdr));
-    if (ret) {
+    ret = qemu_co_send(s->fd, &hdr, sizeof(hdr));
+    if (ret < 0) {
         error_report("failed to send a req, %s", strerror(errno));
         return -EIO;
     }
 
     if (wlen) {
-        ret = do_writev(s->fd, iov, wlen, aio_req->iov_offset);
-        if (ret) {
+        ret = qemu_co_sendv(s->fd, iov, wlen, aio_req->iov_offset);
+        if (ret < 0) {
             error_report("failed to send a data, %s", strerror(errno));
             return -EIO;
         }
diff --git a/cutils.c b/cutils.c
index c91f887..9fe8070 100644
--- a/cutils.c
+++ b/cutils.c
@@ -25,6 +25,8 @@
 #include "host-utils.h"
 #include <math.h>
 
+#include "qemu_socket.h"
+
 void pstrcpy(char *buf, int buf_size, const char *str)
 {
     int c;
@@ -415,3 +419,107 @@ int64_t strtosz(const char *nptr, char **end)
 {
     return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
 }
+
+/*
+ * Send/recv data with iovec buffers
+ *
+ * This function send/recv data from/to the iovec buffer directly.
+ * The first `offset' bytes in the iovec buffer are skipped and next
+ * `len' bytes are used.
+ *
+ * For example,
+ *
+ *   do_send_recv(sockfd, iov, len, offset, 1);
+ *
+ * is equals to
+ *
+ *   char *buf = malloc(size);
+ *   iov_to_buf(iov, iovcnt, buf, offset, size);
+ *   send(sockfd, buf, size, 0);
+ *   free(buf);
+ */
+static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset,
+                          int do_sendv)
+{
+    int ret, diff, iovlen;
+    struct iovec *iov_start;
+
+    iov_start = iov;
+    iovlen = 1;
+    len += offset;
+
+    while (iov->iov_len < len) {
+        len -= iov->iov_len;
+
+        iov++;
+        iovlen++;
+    }
+
+    diff = iov->iov_len - len;
+    iov->iov_len -= diff;
+
+    while (iov->iov_len <= offset) {
+        offset -= iov->iov_len;
+
+        iov++;
+        iovlen--;
+    }
+
+    iov_start->iov_base = (char *) iov->iov_base + offset;
+    iov_start->iov_len -= offset;
+
+    {
+#ifdef CONFIG_IOVEC
+        struct msghdr msg;
+        memset(&msg, 0, sizeof(msg));
+        msg.msg_iov = iov_start;
+        msg.msg_iovlen = iovlen;
+
+        do {
+            if (do_sendv) {
+                ret = sendmsg(sockfd, &msg, 0);
+            } else {
+                ret = recvmsg(sockfd, &msg, 0);
+            }
+        } while (ret == -1 && errno == EINTR);
+#else
+        struct iovec *p = iov_start;
+        ret = 0;
+        while (iovlen > 0) {
+            int rc;
+            if (do_sendv) {
+                rc = send(sockfd, p->iov_base, p->iov_len, 0);
+            } else {
+                rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0);
+            }
+            if (rc == -1) {
+                if (errno == EINTR) {
+                    continue;
+                }
+                if (ret == 0) {
+                    ret = -1;
+                }
+                break;
+            }
+            iovlen--, p++;
+            ret += rc;
+        }
+#endif
+    }
+
+    /* Undo the changes above */
+    iov_start->iov_base = (char *) iov->iov_base - offset;
+    iov_start->iov_len += offset;
+    iov->iov_len += diff;
+    return ret;
+}
+
+int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset)
+{
+    return do_sendv_recvv(sockfd, iov, len, iov_offset, 0);
+}
+
+int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset)
+{
+    return do_sendv_recvv(sockfd, iov, len, iov_offset, 1);
+}
diff --git a/qemu-common.h b/qemu-common.h
index 404c421..fc921cc 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -203,6 +203,9 @@ int qemu_pipe(int pipefd[2]);
 #define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags)
 #endif
 
+int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset);
+int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset);
+
 /* Error handling.  */
 
 void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
diff --git a/qemu-coroutine.c b/qemu-coroutine.c
index 600be26..fd77251 100644
--- a/qemu-coroutine.c
+++ b/qemu-coroutine.c
@@ -73,3 +73,73 @@ void coroutine_fn qemu_coroutine_yield(void)
     self->caller = NULL;
     coroutine_swap(self, to);
 }
+
+int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov,
+                               int len, int iov_offset)
+{
+    int total = 0;
+    int ret;
+    while (len) {
+        ret = qemu_recvv(sockfd, iov, len, iov_offset + total);
+        if (ret < 0) {
+            if (errno == EAGAIN) {
+                qemu_coroutine_yield();
+                ret = 0;
+            } else {
+                if (total == 0) {
+                    total = -1;
+                }
+                break;
+            }
+        }
+
+        total += ret, len -= ret;
+    }
+
+    return total;
+}
+
+int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov,
+                               int len, int iov_offset)
+{
+    int total = 0;
+    int ret;
+    while (len) {
+        ret = qemu_sendv(sockfd, iov, len, iov_offset + total);
+        if (ret < 0) {
+            if (errno == EAGAIN) {
+                qemu_coroutine_yield();
+                ret = 0;
+            } else {
+                if (total == 0) {
+                    total = -1;
+                }
+                break;
+            }
+        }
+
+        total += ret, len -= ret;
+    }
+
+    return total;
+}
+
+int coroutine_fn qemu_co_recv(int sockfd, void *buf, int len)
+{
+    struct iovec iov;
+
+    iov.iov_base = buf;
+    iov.iov_len = len;
+
+    return qemu_co_recvv(sockfd, &iov, len, 0);
+}
+
+int coroutine_fn qemu_co_send(int sockfd, void *buf, int len)
+{
+    struct iovec iov;
+
+    iov.iov_base = buf;
+    iov.iov_len = len;
+
+    return qemu_co_sendv(sockfd, &iov, len, 0);
+}
diff --git a/qemu-coroutine.h b/qemu-coroutine.h
index b8fc4f4..0419e67 100644
--- a/qemu-coroutine.h
+++ b/qemu-coroutine.h
@@ -188,4 +188,30 @@ void qemu_co_rwlock_wrlock(CoRwlock *lock);
  */
 void qemu_co_rwlock_unlock(CoRwlock *lock);
 
+/**
+ * Sends an iovec (or optionally a part of it) down a socket, yielding
+ * when the socket is full.
+ */
+int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov,
+                               int len, int iov_offset);
+
+/**
+ * Receives data into an iovec (or optionally into a part of it) from
+ * a socket, yielding when there is no data in the socket.
+ */
+int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov,
+                               int len, int iov_offset);
+
+
+/**
+ * Sends a buffer down a socket, yielding when the socket is full.
+ */
+int coroutine_fn qemu_co_send(int sockfd, void *buf, int len);
+
+/**
+ * Receives data into a buffer from a socket, yielding when there
+ * is no data in the socket.
+ */
+int coroutine_fn qemu_co_recv(int sockfd, void *buf, int len);
+
 #endif /* QEMU_COROUTINE_H */
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 10/12] block: add bdrv_co_flush support
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (8 preceding siblings ...)
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 09/12] sheepdog: move coroutine send/recv function to generic code Paolo Bonzini
@ 2011-09-08 15:25 ` Paolo Bonzini
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 11/12] nbd: switch to asynchronous operation Paolo Bonzini
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:25 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block.c     |   53 ++++++++++++++++++++++++++++++++++++++++++-----------
 block_int.h |    1 +
 2 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/block.c b/block.c
index 43742b7..3f32f75 100644
--- a/block.c
+++ b/block.c
@@ -64,6 +64,9 @@ static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_co_aio_flush_em(BlockDriverState *bs,
+                                              BlockDriverCompletionFunc *cb,
+                                              void *opaque);
 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
                                          int64_t sector_num, int nb_sectors,
                                          QEMUIOVector *iov);
@@ -204,8 +207,18 @@ void bdrv_register(BlockDriver *bdrv)
         }
     }
 
-    if (!bdrv->bdrv_aio_flush)
-        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
+    if (bdrv->bdrv_aio_flush && !bdrv->bdrv_co_flush) {
+        /* Emulate coroutines by AIO */
+        bdrv->bdrv_co_flush = bdrv_co_flush_em;
+    }
+    if (!bdrv->bdrv_aio_flush) {
+        /* Emulate AIO by either coroutines or sync */
+        if (bdrv->bdrv_co_flush) {
+            bdrv->bdrv_aio_flush = bdrv_co_aio_flush_em;
+        } else {
+            bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
+        }
+    }
 
     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 }
@@ -980,11 +993,6 @@ static inline bool bdrv_has_async_rw(BlockDriver *drv)
         || drv->bdrv_aio_readv != bdrv_aio_readv_em;
 }
 
-static inline bool bdrv_has_async_flush(BlockDriver *drv)
-{
-    return drv->bdrv_aio_flush != bdrv_aio_flush_em;
-}
-
 /* return < 0 if error. See bdrv_write() for the return codes */
 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
               uint8_t *buf, int nb_sectors)
@@ -1713,8 +1721,8 @@ int bdrv_flush(BlockDriverState *bs)
         return 0;
     }
 
-    if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
-        return bdrv_co_flush_em(bs);
+    if (bs->drv && bs->drv->bdrv_co_flush && qemu_in_coroutine()) {
+        return bs->drv->bdrv_co_flush(bs);
     }
 
     if (bs->drv && bs->drv->bdrv_flush) {
@@ -2729,7 +2737,7 @@ static AIOPool bdrv_em_co_aio_pool = {
     .cancel             = bdrv_aio_co_cancel_em,
 };
 
-static void bdrv_co_rw_bh(void *opaque)
+static void bdrv_co_em_bh(void *opaque)
 {
     BlockDriverAIOCBCoroutine *acb = opaque;
 
@@ -2751,7 +2759,7 @@ static void coroutine_fn bdrv_co_rw(void *opaque)
             acb->req.nb_sectors, acb->req.qiov);
     }
 
-    acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
+    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
     qemu_bh_schedule(acb->bh);
 }
 
@@ -2794,6 +2802,29 @@ static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
                                  true);
 }
 
+static void coroutine_fn bdrv_co_flush(void *opaque)
+{
+    BlockDriverAIOCBCoroutine *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+
+    acb->req.error = bs->drv->bdrv_co_flush(bs);
+    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+    qemu_bh_schedule(acb->bh);
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_flush_em(BlockDriverState *bs,
+                                              BlockDriverCompletionFunc *cb,
+                                              void *opaque)
+{
+    Coroutine *co;
+    BlockDriverAIOCBCoroutine *acb;
+
+    acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
+    co = qemu_coroutine_create(bdrv_co_flush);
+    qemu_coroutine_enter(co, acb);
+
+    return &acb->common;
+}
 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
diff --git a/block_int.h b/block_int.h
index 8a72b80..b0cd5ea 100644
--- a/block_int.h
+++ b/block_int.h
@@ -83,6 +83,7 @@ struct BlockDriver {
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
     int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+    int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
 
     int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs,
         int num_reqs);
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 11/12] nbd: switch to asynchronous operation
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (9 preceding siblings ...)
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 10/12] block: add bdrv_co_flush support Paolo Bonzini
@ 2011-09-08 15:25 ` Paolo Bonzini
  2011-09-09 14:52   ` Nicholas Thomas
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 12/12] nbd: split requests Paolo Bonzini
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:25 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/nbd.c |  167 ++++++++++++++++++++++++++++++++++++++--------------------
 nbd.c       |    8 +++
 2 files changed, 117 insertions(+), 58 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 964caa8..5a75263 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -52,6 +52,9 @@ typedef struct BDRVNBDState {
     size_t blocksize;
     char *export_name; /* An NBD server may export several devices */
 
+    CoMutex mutex;
+    Coroutine *coroutine;
+
     /* If it begins with  '/', this is a UNIX domain socket. Otherwise,
      * it's a string of the form <hostname|ip4|\[ip6\]>:port
      */
@@ -104,6 +107,37 @@ out:
     return err;
 }
 
+static void nbd_coroutine_start(BDRVNBDState *s)
+{
+    qemu_co_mutex_lock(&s->mutex);
+    s->coroutine = qemu_coroutine_self();
+}
+
+static void nbd_coroutine_enter(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+    qemu_coroutine_enter(s->coroutine, NULL);
+}
+
+static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request)
+{
+    qemu_aio_set_fd_handler(s->sock, NULL, nbd_coroutine_enter, NULL, NULL, s);
+    return nbd_send_request(s->sock, request);
+}
+
+static int nbd_co_receive_reply(BDRVNBDState *s, struct nbd_reply *reply)
+{
+    qemu_aio_set_fd_handler(s->sock, nbd_coroutine_enter, NULL, NULL, NULL, s);
+    return nbd_receive_reply(s->sock, reply);
+}
+
+static void nbd_coroutine_end(BDRVNBDState *s)
+{
+    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL, s);
+    s->coroutine = NULL;
+    qemu_co_mutex_unlock(&s->mutex);
+}
+
 static int nbd_establish_connection(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
@@ -163,6 +197,8 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
     BDRVNBDState *s = bs->opaque;
     int result;
 
+    qemu_co_mutex_init(&s->mutex);
+
     /* Pop the config into our state object. Exit if invalid. */
     result = nbd_config(s, filename, flags);
     if (result != 0) {
@@ -177,8 +213,8 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
     return result;
 }
 
-static int nbd_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
+static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
+                        int nb_sectors, QEMUIOVector *qiov)
 {
     BDRVNBDState *s = bs->opaque;
     struct nbd_request request;
@@ -189,30 +225,39 @@ static int nbd_read(BlockDriverState *bs, int64_t sector_num,
     request.from = sector_num * 512;;
     request.len = nb_sectors * 512;
 
-    if (nbd_send_request(s->sock, &request) == -1)
-        return -errno;
-
-    if (nbd_receive_reply(s->sock, &reply) == -1)
-        return -errno;
-
-    if (reply.error !=0)
-        return -reply.error;
-
-    if (reply.handle != request.handle)
-        return -EIO;
+    nbd_coroutine_start(s);
+    if (nbd_co_send_request(s, &request) == -1) {
+        reply.error = errno;
+        goto done;
+    }
+    if (nbd_co_receive_reply(s, &reply) == -1) {
+        reply.error = errno;
+        goto done;
+    }
+    if (reply.error != 0) {
+        goto done;
+    }
+    if (reply.handle != request.handle) {
+        reply.error = EIO;
+        goto done;
+    }
+    if (qemu_co_recvv(s->sock, qiov->iov, request.len, 0) != request.len) {
+        reply.error = EIO;
+    }
 
-    if (nbd_wr_sync(s->sock, buf, request.len, 1) != request.len)
-        return -EIO;
+done:
+    nbd_coroutine_end(s);
+    return -reply.error;
 
-    return 0;
 }
 
-static int nbd_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
+static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
+                         int nb_sectors, QEMUIOVector *qiov)
 {
     BDRVNBDState *s = bs->opaque;
     struct nbd_request request;
     struct nbd_reply reply;
+    int ret;
 
     request.type = NBD_CMD_WRITE;
     if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
@@ -223,25 +268,30 @@ static int nbd_write(BlockDriverState *bs, int64_t sector_num,
     request.from = sector_num * 512;;
     request.len = nb_sectors * 512;
 
-    if (nbd_send_request(s->sock, &request) == -1)
-        return -errno;
-
-    if (nbd_wr_sync(s->sock, (uint8_t*)buf, request.len, 0) != request.len)
-        return -EIO;
-
-    if (nbd_receive_reply(s->sock, &reply) == -1)
-        return -errno;
-
-    if (reply.error !=0)
-        return -reply.error;
-
-    if (reply.handle != request.handle)
-        return -EIO;
+    nbd_coroutine_start(s);
+    if (nbd_co_send_request(s, &request) == -1) {
+        reply.error = errno;
+        goto done;
+    }
+    ret = qemu_co_sendv(s->sock, qiov->iov, request.len, 0);
+    if (ret != request.len) {
+        reply.error = EIO;
+        goto done;
+    }
+    if (nbd_co_receive_reply(s, &reply) == -1) {
+        reply.error = errno;
+        goto done;
+    }
+    if (reply.handle != request.handle) {
+        reply.error = EIO;
+    }
 
-    return 0;
+done:
+    nbd_coroutine_end(s);
+    return -reply.error;
 }
 
-static int nbd_flush(BlockDriverState *bs)
+static int nbd_co_flush(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
     struct nbd_request request;
@@ -260,19 +310,22 @@ static int nbd_flush(BlockDriverState *bs)
     request.from = 0;
     request.len = 0;
 
-    if (nbd_send_request(s->sock, &request) == -1)
-        return -errno;
-
-    if (nbd_receive_reply(s->sock, &reply) == -1)
-        return -errno;
-
-    if (reply.error !=0)
-        return -reply.error;
-
-    if (reply.handle != request.handle)
-        return -EIO;
+    nbd_coroutine_start(s);
+    if (nbd_co_send_request(s, &request) == -1) {
+        reply.error = errno;
+        goto done;
+    }
+    if (nbd_co_receive_reply(s, &reply) == -1) {
+        reply.error = errno;
+        goto done;
+    }
+    if (reply.error == 0 && reply.handle != request.handle) {
+        reply.error = EIO;
+    }
 
-    return 0;
+done:
+    nbd_coroutine_end(s);
+    return -reply.error;
 }
 
 static int nbd_discard(BlockDriverState *bs, int64_t sector_num,
@@ -290,19 +343,17 @@ static int nbd_discard(BlockDriverState *bs, int64_t sector_num,
     request.from = sector_num * 512;;
     request.len = nb_sectors * 512;
 
-    if (nbd_send_request(s->sock, &request) == -1)
+    if (nbd_send_request(s->sock, &request) == -1) {
         return -errno;
-
-    if (nbd_receive_reply(s->sock, &reply) == -1)
+    }
+    if (nbd_receive_reply(s->sock, &reply) == -1) {
         return -errno;
-
-    if (reply.error !=0)
-        return -reply.error;
-
-    if (reply.handle != request.handle)
+    }
+    if (reply.error == 0 && reply.handle != request.handle) {
         return -EIO;
+    }
 
-    return 0;
+    return -reply.error;
 }
 
 static void nbd_close(BlockDriverState *bs)
@@ -325,10 +376,10 @@ static BlockDriver bdrv_nbd = {
     .format_name	= "nbd",
     .instance_size	= sizeof(BDRVNBDState),
     .bdrv_file_open	= nbd_open,
-    .bdrv_read		= nbd_read,
-    .bdrv_write		= nbd_write,
+    .bdrv_co_readv	= nbd_co_readv,
+    .bdrv_co_writev	= nbd_co_writev,
     .bdrv_close		= nbd_close,
-    .bdrv_flush		= nbd_flush,
+    .bdrv_co_flush	= nbd_co_flush,
     .bdrv_discard	= nbd_discard,
     .bdrv_getlength	= nbd_getlength,
     .protocol_name	= "nbd",
diff --git a/nbd.c b/nbd.c
index f089904..2f4c6b3 100644
--- a/nbd.c
+++ b/nbd.c
@@ -80,6 +80,14 @@ size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
 {
     size_t offset = 0;
 
+    if (qemu_in_coroutine()) {
+        if (do_read) {
+            return qemu_co_recv(fd, buffer, size);
+        } else {
+            return qemu_co_send(fd, buffer, size);
+        }
+    }
+
     while (offset < size) {
         ssize_t len;
 
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH 12/12] nbd: split requests
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (10 preceding siblings ...)
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 11/12] nbd: switch to asynchronous operation Paolo Bonzini
@ 2011-09-08 15:25 ` Paolo Bonzini
  2011-09-09 14:52   ` Nicholas Thomas
  2011-09-09  9:00 ` [Qemu-devel] [PATCH 00/12] nbd improvements Kevin Wolf
  2011-09-14  9:50 ` Kevin Wolf
  13 siblings, 1 reply; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-08 15:25 UTC (permalink / raw)
  To: qemu-devel

qemu-nbd has a limit of slightly less than 1M per request.  Work
around this in the nbd block driver.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/nbd.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 5a75263..468a517 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -213,8 +213,9 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
     return result;
 }
 
-static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov)
+static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
+                          int nb_sectors, QEMUIOVector *qiov,
+                          int offset)
 {
     BDRVNBDState *s = bs->opaque;
     struct nbd_request request;
@@ -241,7 +242,7 @@ static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
         reply.error = EIO;
         goto done;
     }
-    if (qemu_co_recvv(s->sock, qiov->iov, request.len, 0) != request.len) {
+    if (qemu_co_recvv(s->sock, qiov->iov, request.len, offset) != request.len) {
         reply.error = EIO;
     }
 
@@ -251,8 +252,9 @@ done:
 
 }
 
-static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov)
+static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
+                           int nb_sectors, QEMUIOVector *qiov,
+                           int offset)
 {
     BDRVNBDState *s = bs->opaque;
     struct nbd_request request;
@@ -273,7 +275,7 @@ static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
         reply.error = errno;
         goto done;
     }
-    ret = qemu_co_sendv(s->sock, qiov->iov, request.len, 0);
+    ret = qemu_co_sendv(s->sock, qiov->iov, request.len, offset);
     if (ret != request.len) {
         reply.error = EIO;
         goto done;
@@ -291,6 +293,44 @@ done:
     return -reply.error;
 }
 
+/* qemu-nbd has a limit of slightly less than 1M per request.  For safety,
+ * transfer at most 512K per request. */
+#define NBD_MAX_SECTORS 1024
+
+static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
+                        int nb_sectors, QEMUIOVector *qiov)
+{
+    int offset = 0;
+    int ret;
+    while (nb_sectors > NBD_MAX_SECTORS) {
+        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        if (ret < 0) {
+            return ret;
+        }
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+    }
+    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
+}
+
+static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
+                         int nb_sectors, QEMUIOVector *qiov)
+{
+    int offset = 0;
+    int ret;
+    while (nb_sectors > NBD_MAX_SECTORS) {
+        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        if (ret < 0) {
+            return ret;
+        }
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+    }
+    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
+}
+
 static int nbd_co_flush(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 09/12] sheepdog: move coroutine send/recv function to generic code
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 09/12] sheepdog: move coroutine send/recv function to generic code Paolo Bonzini
@ 2011-09-09  4:53   ` MORITA Kazutaka
  2011-09-09  8:11     ` [Qemu-devel] [PATCH v2 " Paolo Bonzini
  0 siblings, 1 reply; 42+ messages in thread
From: MORITA Kazutaka @ 2011-09-09  4:53 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, MORITA Kazutaka

At Thu,  8 Sep 2011 17:25:02 +0200,
Paolo Bonzini wrote:
> 
> Outside coroutines, avoid busy waiting on EAGAIN by temporarily
> making the socket blocking.
> 
> The API of qemu_recvv/qemu_sendv is slightly different from
> do_readv/do_writev because they do not handle coroutines.  It
> returns the number of bytes written before encountering an
> EAGAIN.  The specificity of yielding on EAGAIN is entirely in
> qemu-coroutine.c.
> 
> Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/sheepdog.c |  221 +++++------------------------------------------------
>  cutils.c         |  108 ++++++++++++++++++++++++++
>  qemu-common.h    |    3 +
>  qemu-coroutine.c |   71 +++++++++++++++++
>  qemu-coroutine.h |   26 +++++++
>  5 files changed, 229 insertions(+), 200 deletions(-)
> 
> diff --git a/block/sheepdog.c b/block/sheepdog.c
> index af696a5..188a8d8 100644
> --- a/block/sheepdog.c
> +++ b/block/sheepdog.c
> @@ -443,129 +443,6 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
>      return acb;
>  }
>  
> -#ifdef _WIN32
> -
> -struct msghdr {
> -    struct iovec *msg_iov;
> -    size_t        msg_iovlen;
> -};
> -
> -static ssize_t sendmsg(int s, const struct msghdr *msg, int flags)
> -{
> -    size_t size = 0;
> -    char *buf, *p;
> -    int i, ret;
> -
> -    /* count the msg size */
> -    for (i = 0; i < msg->msg_iovlen; i++) {
> -        size += msg->msg_iov[i].iov_len;
> -    }
> -    buf = g_malloc(size);
> -
> -    p = buf;
> -    for (i = 0; i < msg->msg_iovlen; i++) {
> -        memcpy(p, msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len);
> -        p += msg->msg_iov[i].iov_len;
> -    }
> -
> -    ret = send(s, buf, size, flags);
> -
> -    g_free(buf);
> -    return ret;
> -}
> -
> -static ssize_t recvmsg(int s, struct msghdr *msg, int flags)
> -{
> -    size_t size = 0;
> -    char *buf, *p;
> -    int i, ret;
> -
> -    /* count the msg size */
> -    for (i = 0; i < msg->msg_iovlen; i++) {
> -        size += msg->msg_iov[i].iov_len;
> -    }
> -    buf = g_malloc(size);
> -
> -    ret = qemu_recv(s, buf, size, flags);
> -    if (ret < 0) {
> -        goto out;
> -    }
> -
> -    p = buf;
> -    for (i = 0; i < msg->msg_iovlen; i++) {
> -        memcpy(msg->msg_iov[i].iov_base, p, msg->msg_iov[i].iov_len);
> -        p += msg->msg_iov[i].iov_len;
> -    }
> -out:
> -    g_free(buf);
> -    return ret;
> -}
> -
> -#endif
> -
> -/*
> - * Send/recv data with iovec buffers
> - *
> - * This function send/recv data from/to the iovec buffer directly.
> - * The first `offset' bytes in the iovec buffer are skipped and next
> - * `len' bytes are used.
> - *
> - * For example,
> - *
> - *   do_send_recv(sockfd, iov, len, offset, 1);
> - *
> - * is equals to
> - *
> - *   char *buf = malloc(size);
> - *   iov_to_buf(iov, iovcnt, buf, offset, size);
> - *   send(sockfd, buf, size, 0);
> - *   free(buf);
> - */
> -static int do_send_recv(int sockfd, struct iovec *iov, int len, int offset,
> -                        int write)
> -{
> -    struct msghdr msg;
> -    int ret, diff;
> -
> -    memset(&msg, 0, sizeof(msg));
> -    msg.msg_iov = iov;
> -    msg.msg_iovlen = 1;
> -
> -    len += offset;
> -
> -    while (iov->iov_len < len) {
> -        len -= iov->iov_len;
> -
> -        iov++;
> -        msg.msg_iovlen++;
> -    }
> -
> -    diff = iov->iov_len - len;
> -    iov->iov_len -= diff;
> -
> -    while (msg.msg_iov->iov_len <= offset) {
> -        offset -= msg.msg_iov->iov_len;
> -
> -        msg.msg_iov++;
> -        msg.msg_iovlen--;
> -    }
> -
> -    msg.msg_iov->iov_base = (char *) msg.msg_iov->iov_base + offset;
> -    msg.msg_iov->iov_len -= offset;
> -
> -    if (write) {
> -        ret = sendmsg(sockfd, &msg, 0);
> -    } else {
> -        ret = recvmsg(sockfd, &msg, 0);
> -    }
> -
> -    msg.msg_iov->iov_base = (char *) msg.msg_iov->iov_base - offset;
> -    msg.msg_iov->iov_len += offset;
> -
> -    iov->iov_len += diff;
> -    return ret;
> -}
> -
>  static int connect_to_sdog(const char *addr, const char *port)
>  {
>      char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
> @@ -618,65 +495,6 @@ success:
>      return fd;
>  }
>  
> -static int do_readv_writev(int sockfd, struct iovec *iov, int len,
> -                           int iov_offset, int write)
> -{
> -    int ret;
> -again:
> -    ret = do_send_recv(sockfd, iov, len, iov_offset, write);
> -    if (ret < 0) {
> -        if (errno == EINTR) {
> -            goto again;
> -        }
> -        if (errno == EAGAIN) {
> -            if (qemu_in_coroutine()) {
> -                qemu_coroutine_yield();
> -            }
> -            goto again;
> -        }
> -        error_report("failed to recv a rsp, %s", strerror(errno));
> -        return 1;
> -    }
> -
> -    iov_offset += ret;
> -    len -= ret;
> -    if (len) {
> -        goto again;
> -    }
> -
> -    return 0;
> -}
> -
> -static int do_readv(int sockfd, struct iovec *iov, int len, int iov_offset)
> -{
> -    return do_readv_writev(sockfd, iov, len, iov_offset, 0);
> -}
> -
> -static int do_writev(int sockfd, struct iovec *iov, int len, int iov_offset)
> -{
> -    return do_readv_writev(sockfd, iov, len, iov_offset, 1);
> -}
> -
> -static int do_read_write(int sockfd, void *buf, int len, int write)
> -{
> -    struct iovec iov;
> -
> -    iov.iov_base = buf;
> -    iov.iov_len = len;
> -
> -    return do_readv_writev(sockfd, &iov, len, 0, write);
> -}
> -
> -static int do_read(int sockfd, void *buf, int len)
> -{
> -    return do_read_write(sockfd, buf, len, 0);
> -}
> -
> -static int do_write(int sockfd, void *buf, int len)
> -{
> -    return do_read_write(sockfd, buf, len, 1);
> -}
> -
>  static int send_req(int sockfd, SheepdogReq *hdr, void *data,
>                      unsigned int *wlen)
>  {
> @@ -691,10 +509,9 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
>          iov[1].iov_len = *wlen;
>      }
>  
> -    ret = do_writev(sockfd, iov, sizeof(*hdr) + *wlen, 0);
> -    if (ret) {
> +    ret = qemu_sendv(sockfd, iov, sizeof(*hdr) + *wlen, 0);
> +    if (ret < 0) {
>          error_report("failed to send a req, %s", strerror(errno));
> -        ret = -1;
>      }
>  
>      return ret;
> @@ -704,17 +521,19 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
>                    unsigned int *wlen, unsigned int *rlen)
>  {
>      int ret;
> +    struct iovec iov;
>  
> +    socket_set_block(sockfd);
>      ret = send_req(sockfd, hdr, data, wlen);
> -    if (ret) {
> -        ret = -1;
> +    if (ret < 0) {
>          goto out;
>      }
>  
> -    ret = do_read(sockfd, hdr, sizeof(*hdr));
> -    if (ret) {
> +    iov.iov_base = hdr;
> +    iov.iov_len = sizeof(*hdr);
> +    ret = qemu_recvv(sockfd, &iov, sizeof(*hdr), 0);
> +    if (ret < 0) {
>          error_report("failed to get a rsp, %s", strerror(errno));
> -        ret = -1;
>          goto out;
>      }
>  
> @@ -723,15 +542,17 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
>      }
>  
>      if (*rlen) {
> -        ret = do_read(sockfd, data, *rlen);
> -        if (ret) {
> +        iov.iov_base = data;
> +        iov.iov_len = *rlen;
> +        ret = qemu_recvv(sockfd, &iov, *rlen, 0);
> +        if (ret < 0) {
>              error_report("failed to get the data, %s", strerror(errno));
> -            ret = -1;
>              goto out;
>          }
>      }
>      ret = 0;
>  out:
> +    socket_set_nonblock(sockfd);
>      return ret;
>  }
>  
> @@ -793,8 +614,8 @@ static void coroutine_fn aio_read_response(void *opaque)
>      }
>  
>      /* read a header */
> -    ret = do_read(fd, &rsp, sizeof(rsp));
> -    if (ret) {
> +    ret = qemu_co_recv(fd, &rsp, sizeof(rsp));
> +    if (ret < 0) {
>          error_report("failed to get the header, %s", strerror(errno));
>          goto out;
>      }
> @@ -839,9 +660,9 @@ static void coroutine_fn aio_read_response(void *opaque)
>          }
>          break;
>      case AIOCB_READ_UDATA:
> -        ret = do_readv(fd, acb->qiov->iov, rsp.data_length,
> -                       aio_req->iov_offset);
> -        if (ret) {
> +        ret = qemu_co_recvv(fd, acb->qiov->iov, rsp.data_length,
> +                            aio_req->iov_offset);
> +        if (ret < 0) {
>              error_report("failed to get the data, %s", strerror(errno));
>              goto out;
>          }
> @@ -1114,15 +935,15 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
>      set_cork(s->fd, 1);
>  
>      /* send a header */
> -    ret = do_write(s->fd, &hdr, sizeof(hdr));
> -    if (ret) {
> +    ret = qemu_co_send(s->fd, &hdr, sizeof(hdr));
> +    if (ret < 0) {
>          error_report("failed to send a req, %s", strerror(errno));
>          return -EIO;
>      }
>  
>      if (wlen) {
> -        ret = do_writev(s->fd, iov, wlen, aio_req->iov_offset);
> -        if (ret) {
> +        ret = qemu_co_sendv(s->fd, iov, wlen, aio_req->iov_offset);
> +        if (ret < 0) {
>              error_report("failed to send a data, %s", strerror(errno));
>              return -EIO;
>          }
> diff --git a/cutils.c b/cutils.c
> index c91f887..9fe8070 100644
> --- a/cutils.c
> +++ b/cutils.c
> @@ -25,6 +25,8 @@
>  #include "host-utils.h"
>  #include <math.h>
>  
> +#include "qemu_socket.h"
> +
>  void pstrcpy(char *buf, int buf_size, const char *str)
>  {
>      int c;
> @@ -415,3 +419,107 @@ int64_t strtosz(const char *nptr, char **end)
>  {
>      return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
>  }
> +
> +/*
> + * Send/recv data with iovec buffers
> + *
> + * This function send/recv data from/to the iovec buffer directly.
> + * The first `offset' bytes in the iovec buffer are skipped and next
> + * `len' bytes are used.
> + *
> + * For example,
> + *
> + *   do_send_recv(sockfd, iov, len, offset, 1);
> + *
> + * is equals to
> + *
> + *   char *buf = malloc(size);
> + *   iov_to_buf(iov, iovcnt, buf, offset, size);
> + *   send(sockfd, buf, size, 0);
> + *   free(buf);
> + */
> +static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset,
> +                          int do_sendv)
> +{
> +    int ret, diff, iovlen;
> +    struct iovec *iov_start;
> +
> +    iov_start = iov;
> +    iovlen = 1;
> +    len += offset;
> +
> +    while (iov->iov_len < len) {
> +        len -= iov->iov_len;
> +
> +        iov++;
> +        iovlen++;
> +    }
> +
> +    diff = iov->iov_len - len;
> +    iov->iov_len -= diff;
> +
> +    while (iov->iov_len <= offset) {
> +        offset -= iov->iov_len;
> +
> +        iov++;
> +        iovlen--;
> +    }

I think this should be

    while (iov_start->iov_len <= offset) {
        offset -= iov_start->iov_len;

        iov_start++;
        iovlen--;
    }

> +
> +    iov_start->iov_base = (char *) iov->iov_base + offset;

This line should be
    iov_start->iov_base = (char *) iov_start->iov_base + offset;

> +    iov_start->iov_len -= offset;
> +
> +    {
> +#ifdef CONFIG_IOVEC
> +        struct msghdr msg;
> +        memset(&msg, 0, sizeof(msg));
> +        msg.msg_iov = iov_start;
> +        msg.msg_iovlen = iovlen;
> +
> +        do {
> +            if (do_sendv) {
> +                ret = sendmsg(sockfd, &msg, 0);
> +            } else {
> +                ret = recvmsg(sockfd, &msg, 0);
> +            }
> +        } while (ret == -1 && errno == EINTR);
> +#else
> +        struct iovec *p = iov_start;
> +        ret = 0;
> +        while (iovlen > 0) {
> +            int rc;
> +            if (do_sendv) {
> +                rc = send(sockfd, p->iov_base, p->iov_len, 0);
> +            } else {
> +                rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0);
> +            }
> +            if (rc == -1) {
> +                if (errno == EINTR) {
> +                    continue;
> +                }
> +                if (ret == 0) {
> +                    ret = -1;
> +                }
> +                break;
> +            }
> +            iovlen--, p++;
> +            ret += rc;
> +        }
> +#endif
> +    }
> +
> +    /* Undo the changes above */
> +    iov_start->iov_base = (char *) iov->iov_base - offset;

Should be
    iov_start->iov_base = (char *) iov_start->iov_base - offset;


Thanks,

Kazutaka

^ permalink raw reply	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH v2 09/12] sheepdog: move coroutine send/recv function to generic code
  2011-09-09  4:53   ` MORITA Kazutaka
@ 2011-09-09  8:11     ` Paolo Bonzini
  2011-09-13  0:28       ` MORITA Kazutaka
  2011-09-13 14:14       ` Kevin Wolf
  0 siblings, 2 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-09  8:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: MORITA Kazutaka

Outside coroutines, avoid busy waiting on EAGAIN by temporarily
making the socket blocking.

The API of qemu_recvv/qemu_sendv is slightly different from
do_readv/do_writev because they do not handle coroutines.  It
returns the number of bytes written before encountering an
EAGAIN.  The specificity of yielding on EAGAIN is entirely in
qemu-coroutine.c.

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	Thanks for the review.  I checked with qemu-io that all of

		readv -v 0 524288 (x8)
                readv -v 0 262144 (x16)
                readv -v 0 1024 (x4096)
                readv -v 0 1536 (x2730) 1024
                readv -v 0 1024 512 (x2730) 1024

	work and produce the same output, while previously they would fail.
	Looks like it's hard to trigger the code just with qemu.

 block/sheepdog.c |  225 ++++++------------------------------------------------
 cutils.c         |  103 +++++++++++++++++++++++++
 qemu-common.h    |    3 +
 qemu-coroutine.c |   70 +++++++++++++++++
 qemu-coroutine.h |   26 ++++++
 5 files changed, 225 insertions(+), 202 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index af696a5..94e62a3 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -443,129 +443,6 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
     return acb;
 }
 
-#ifdef _WIN32
-
-struct msghdr {
-    struct iovec *msg_iov;
-    size_t        msg_iovlen;
-};
-
-static ssize_t sendmsg(int s, const struct msghdr *msg, int flags)
-{
-    size_t size = 0;
-    char *buf, *p;
-    int i, ret;
-
-    /* count the msg size */
-    for (i = 0; i < msg->msg_iovlen; i++) {
-        size += msg->msg_iov[i].iov_len;
-    }
-    buf = g_malloc(size);
-
-    p = buf;
-    for (i = 0; i < msg->msg_iovlen; i++) {
-        memcpy(p, msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len);
-        p += msg->msg_iov[i].iov_len;
-    }
-
-    ret = send(s, buf, size, flags);
-
-    g_free(buf);
-    return ret;
-}
-
-static ssize_t recvmsg(int s, struct msghdr *msg, int flags)
-{
-    size_t size = 0;
-    char *buf, *p;
-    int i, ret;
-
-    /* count the msg size */
-    for (i = 0; i < msg->msg_iovlen; i++) {
-        size += msg->msg_iov[i].iov_len;
-    }
-    buf = g_malloc(size);
-
-    ret = qemu_recv(s, buf, size, flags);
-    if (ret < 0) {
-        goto out;
-    }
-
-    p = buf;
-    for (i = 0; i < msg->msg_iovlen; i++) {
-        memcpy(msg->msg_iov[i].iov_base, p, msg->msg_iov[i].iov_len);
-        p += msg->msg_iov[i].iov_len;
-    }
-out:
-    g_free(buf);
-    return ret;
-}
-
-#endif
-
-/*
- * Send/recv data with iovec buffers
- *
- * This function send/recv data from/to the iovec buffer directly.
- * The first `offset' bytes in the iovec buffer are skipped and next
- * `len' bytes are used.
- *
- * For example,
- *
- *   do_send_recv(sockfd, iov, len, offset, 1);
- *
- * is equals to
- *
- *   char *buf = malloc(size);
- *   iov_to_buf(iov, iovcnt, buf, offset, size);
- *   send(sockfd, buf, size, 0);
- *   free(buf);
- */
-static int do_send_recv(int sockfd, struct iovec *iov, int len, int offset,
-                        int write)
-{
-    struct msghdr msg;
-    int ret, diff;
-
-    memset(&msg, 0, sizeof(msg));
-    msg.msg_iov = iov;
-    msg.msg_iovlen = 1;
-
-    len += offset;
-
-    while (iov->iov_len < len) {
-        len -= iov->iov_len;
-
-        iov++;
-        msg.msg_iovlen++;
-    }
-
-    diff = iov->iov_len - len;
-    iov->iov_len -= diff;
-
-    while (msg.msg_iov->iov_len <= offset) {
-        offset -= msg.msg_iov->iov_len;
-
-        msg.msg_iov++;
-        msg.msg_iovlen--;
-    }
-
-    msg.msg_iov->iov_base = (char *) msg.msg_iov->iov_base + offset;
-    msg.msg_iov->iov_len -= offset;
-
-    if (write) {
-        ret = sendmsg(sockfd, &msg, 0);
-    } else {
-        ret = recvmsg(sockfd, &msg, 0);
-    }
-
-    msg.msg_iov->iov_base = (char *) msg.msg_iov->iov_base - offset;
-    msg.msg_iov->iov_len += offset;
-
-    iov->iov_len += diff;
-    return ret;
-}
-
 static int connect_to_sdog(const char *addr, const char *port)
 {
     char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
@@ -618,65 +495,6 @@ success:
     return fd;
 }
 
-static int do_readv_writev(int sockfd, struct iovec *iov, int len,
-                           int iov_offset, int write)
-{
-    int ret;
-again:
-    ret = do_send_recv(sockfd, iov, len, iov_offset, write);
-    if (ret < 0) {
-        if (errno == EINTR) {
-            goto again;
-        }
-        if (errno == EAGAIN) {
-            if (qemu_in_coroutine()) {
-                qemu_coroutine_yield();
-            }
-            goto again;
-        }
-        error_report("failed to recv a rsp, %s", strerror(errno));
-        return 1;
-    }
-
-    iov_offset += ret;
-    len -= ret;
-    if (len) {
-        goto again;
-    }
-
-    return 0;
-}
-
-static int do_readv(int sockfd, struct iovec *iov, int len, int iov_offset)
-{
-    return do_readv_writev(sockfd, iov, len, iov_offset, 0);
-}
-
-static int do_writev(int sockfd, struct iovec *iov, int len, int iov_offset)
-{
-    return do_readv_writev(sockfd, iov, len, iov_offset, 1);
-}
-
-static int do_read_write(int sockfd, void *buf, int len, int write)
-{
-    struct iovec iov;
-
-    iov.iov_base = buf;
-    iov.iov_len = len;
-
-    return do_readv_writev(sockfd, &iov, len, 0, write);
-}
-
-static int do_read(int sockfd, void *buf, int len)
-{
-    return do_read_write(sockfd, buf, len, 0);
-}
-
-static int do_write(int sockfd, void *buf, int len)
-{
-    return do_read_write(sockfd, buf, len, 1);
-}
-
 static int send_req(int sockfd, SheepdogReq *hdr, void *data,
                     unsigned int *wlen)
 {
@@ -691,10 +509,9 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
         iov[1].iov_len = *wlen;
     }
 
-    ret = do_writev(sockfd, iov, sizeof(*hdr) + *wlen, 0);
-    if (ret) {
+    ret = qemu_sendv(sockfd, iov, sizeof(*hdr) + *wlen, 0);
+    if (ret < 0) {
         error_report("failed to send a req, %s", strerror(errno));
-        ret = -1;
     }
 
     return ret;
@@ -704,17 +521,19 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
                   unsigned int *wlen, unsigned int *rlen)
 {
     int ret;
+    struct iovec iov;
 
+    socket_set_block(sockfd);
     ret = send_req(sockfd, hdr, data, wlen);
-    if (ret) {
-        ret = -1;
+    if (ret < 0) {
         goto out;
     }
 
-    ret = do_read(sockfd, hdr, sizeof(*hdr));
-    if (ret) {
+    iov.iov_base = hdr;
+    iov.iov_len = sizeof(*hdr);
+    ret = qemu_recvv(sockfd, &iov, sizeof(*hdr), 0);
+    if (ret < 0) {
         error_report("failed to get a rsp, %s", strerror(errno));
-        ret = -1;
         goto out;
     }
 
@@ -723,15 +542,17 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
     }
 
     if (*rlen) {
-        ret = do_read(sockfd, data, *rlen);
-        if (ret) {
+        iov.iov_base = data;
+        iov.iov_len = *rlen;
+        ret = qemu_recvv(sockfd, &iov, *rlen, 0);
+        if (ret < 0) {
             error_report("failed to get the data, %s", strerror(errno));
-            ret = -1;
             goto out;
         }
     }
     ret = 0;
 out:
+    socket_set_nonblock(sockfd);
     return ret;
 }
 
@@ -793,8 +614,8 @@ static void coroutine_fn aio_read_response(void *opaque)
     }
 
     /* read a header */
-    ret = do_read(fd, &rsp, sizeof(rsp));
-    if (ret) {
+    ret = qemu_co_recv(fd, &rsp, sizeof(rsp));
+    if (ret < 0) {
         error_report("failed to get the header, %s", strerror(errno));
         goto out;
     }
@@ -839,9 +660,9 @@ static void coroutine_fn aio_read_response(void *opaque)
         }
         break;
     case AIOCB_READ_UDATA:
-        ret = do_readv(fd, acb->qiov->iov, rsp.data_length,
-                       aio_req->iov_offset);
-        if (ret) {
+        ret = qemu_co_recvv(fd, acb->qiov->iov, rsp.data_length,
+                            aio_req->iov_offset);
+        if (ret < 0) {
             error_report("failed to get the data, %s", strerror(errno));
             goto out;
         }
@@ -1114,15 +935,15 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     set_cork(s->fd, 1);
 
     /* send a header */
-    ret = do_write(s->fd, &hdr, sizeof(hdr));
-    if (ret) {
+    ret = qemu_co_send(s->fd, &hdr, sizeof(hdr));
+    if (ret < 0) {
         error_report("failed to send a req, %s", strerror(errno));
         return -EIO;
     }
 
     if (wlen) {
-        ret = do_writev(s->fd, iov, wlen, aio_req->iov_offset);
-        if (ret) {
+        ret = qemu_co_sendv(s->fd, iov, wlen, aio_req->iov_offset);
+        if (ret < 0) {
             error_report("failed to send a data, %s", strerror(errno));
             return -EIO;
         }
diff --git a/cutils.c b/cutils.c
index c91f887..229794e 100644
--- a/cutils.c
+++ b/cutils.c
@@ -25,6 +25,8 @@
 #include "host-utils.h"
 #include <math.h>
 
+#include "qemu_socket.h"
+
 void pstrcpy(char *buf, int buf_size, const char *str)
 {
     int c;
@@ -415,3 +417,108 @@ int64_t strtosz(const char *nptr, char **end)
 {
     return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
 }
+
+/*
+ * Send/recv data with iovec buffers
+ *
+ * This function send/recv data from/to the iovec buffer directly.
+ * The first `offset' bytes in the iovec buffer are skipped and next
+ * `len' bytes are used.
+ *
+ * For example,
+ *
+ *   do_sendv_recvv(sockfd, iov, len, offset, 1);
+ *
+ * is equal to
+ *
+ *   char *buf = malloc(size);
+ *   iov_to_buf(iov, iovcnt, buf, offset, size);
+ *   send(sockfd, buf, size, 0);
+ *   free(buf);
+ */
+static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset,
+                          int do_sendv)
+{
+    int ret, diff, iovlen;
+    struct iovec *last_iov;
+
+    /* last_iov is inclusive, so count from one.  */
+    iovlen = 1;
+    last_iov = iov;
+    len += offset;
+
+    while (last_iov->iov_len < len) {
+        len -= last_iov->iov_len;
+
+        last_iov++;
+        iovlen++;
+    }
+
+    diff = last_iov->iov_len - len;
+    last_iov->iov_len -= diff;
+
+    while (iov->iov_len <= offset) {
+        offset -= iov->iov_len;
+
+        iov++;
+        iovlen--;
+    }
+
+    iov->iov_base = (char *) iov->iov_base + offset;
+    iov->iov_len -= offset;
+
+    {
+#ifdef CONFIG_IOVEC
+        struct msghdr msg;
+        memset(&msg, 0, sizeof(msg));
+        msg.msg_iov = iov;
+        msg.msg_iovlen = iovlen;
+
+        do {
+            if (do_sendv) {
+                ret = sendmsg(sockfd, &msg, 0);
+            } else {
+                ret = recvmsg(sockfd, &msg, 0);
+            }
+        } while (ret == -1 && errno == EINTR);
+#else
+        struct iovec *p = iov;
+        ret = 0;
+        while (iovlen > 0) {
+            int rc;
+            if (do_sendv) {
+                rc = send(sockfd, p->iov_base, p->iov_len, 0);
+            } else {
+                rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0);
+            }
+            if (rc == -1) {
+                if (errno == EINTR) {
+                    continue;
+                }
+                if (ret == 0) {
+                    ret = -1;
+                }
+                break;
+            }
+            iovlen--, p++;
+            ret += rc;
+        }
+#endif
+    }
+
+    /* Undo the changes above */
+    iov->iov_base = (char *) iov->iov_base - offset;
+    iov->iov_len += offset;
+    last_iov->iov_len += diff;
+    return ret;
+}
+
+int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset)
+{
+    return do_sendv_recvv(sockfd, iov, len, iov_offset, 0);
+}
+
+int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset)
+{
+    return do_sendv_recvv(sockfd, iov, len, iov_offset, 1);
+}
diff --git a/qemu-common.h b/qemu-common.h
index 404c421..fc921cc 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -203,6 +203,9 @@ int qemu_pipe(int pipefd[2]);
 #define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags)
 #endif
 
+int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset);
+int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset);
+
 /* Error handling.  */
 
 void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
diff --git a/qemu-coroutine.c b/qemu-coroutine.c
index 600be26..f5abbb9 100644
--- a/qemu-coroutine.c
+++ b/qemu-coroutine.c
@@ -73,3 +73,69 @@ void coroutine_fn qemu_coroutine_yield(void)
     self->caller = NULL;
     coroutine_swap(self, to);
 }
+
+int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov,
+                               int len, int iov_offset)
+{
+    int total = 0;
+    int ret;
+    while (len) {
+        ret = qemu_recvv(sockfd, iov, len, iov_offset + total);
+        if (ret < 0) {
+            if (errno == EAGAIN) {
+                qemu_coroutine_yield();
+                continue;
+            }
+            if (total == 0) {
+                total = -1;
+            }
+            break;
+        }
+        total += ret, len -= ret;
+    }
+
+    return total;
+}
+
+int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov,
+                               int len, int iov_offset)
+{
+    int total = 0;
+    int ret;
+    while (len) {
+        ret = qemu_sendv(sockfd, iov, len, iov_offset + total);
+        if (ret < 0) {
+            if (errno == EAGAIN) {
+                qemu_coroutine_yield();
+                continue;
+            }
+            if (total == 0) {
+                total = -1;
+            }
+            break;
+        }
+        total += ret, len -= ret;
+    }
+
+    return total;
+}
+
+int coroutine_fn qemu_co_recv(int sockfd, void *buf, int len)
+{
+    struct iovec iov;
+
+    iov.iov_base = buf;
+    iov.iov_len = len;
+
+    return qemu_co_recvv(sockfd, &iov, len, 0);
+}
+
+int coroutine_fn qemu_co_send(int sockfd, void *buf, int len)
+{
+    struct iovec iov;
+
+    iov.iov_base = buf;
+    iov.iov_len = len;
+
+    return qemu_co_sendv(sockfd, &iov, len, 0);
+}
diff --git a/qemu-coroutine.h b/qemu-coroutine.h
index b8fc4f4..a1a41f6 100644
--- a/qemu-coroutine.h
+++ b/qemu-coroutine.h
@@ -188,4 +188,30 @@ void qemu_co_rwlock_wrlock(CoRwlock *lock);
  */
 void qemu_co_rwlock_unlock(CoRwlock *lock);
 
+/**
+ * Sends an iovec (or optionally a part of it) down a socket, yielding
+ * when the socket is full.
+ */
+int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov,
+                               int len, int iov_offset);
+
+/**
+ * Receives data into an iovec (or optionally into a part of it) from
+ * a socket, yielding when there is no data in the socket.
+ */
+int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov,
+                               int len, int iov_offset);
+
+
+/**
+ * Sends a buffer down a socket, yielding when the socket is full.
+ */
+int coroutine_fn qemu_co_send(int sockfd, void *buf, int len);
+
+/**
+ * Receives data into a buffer from a socket, yielding when there
+ * is no data in the socket.
+ */
+int coroutine_fn qemu_co_recv(int sockfd, void *buf, int len);
+
 #endif /* QEMU_COROUTINE_H */
-- 
1.7.6

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 00/12] nbd improvements
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (11 preceding siblings ...)
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 12/12] nbd: split requests Paolo Bonzini
@ 2011-09-09  9:00 ` Kevin Wolf
  2011-09-09 10:29   ` Paolo Bonzini
  2011-09-14  9:50 ` Kevin Wolf
  13 siblings, 1 reply; 42+ messages in thread
From: Kevin Wolf @ 2011-09-09  9:00 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, Nicholas Thomas

Am 08.09.2011 17:24, schrieb Paolo Bonzini:
> I find nbd quite useful to test migration, but it is limited:
> it can only do synchronous operation, it is not safe because it
> does not support flush, and it has no discard either.  qemu-nbd
> is also limited to 1MB requests, and the nbd block driver does
> not take this into account.
> 
> Luckily, flush/FUA support is being worked out by upstream,
> and discard can also be added with the same framework (patches
> 1 to 6).
> 
> Asynchronous support is also very similar to what sheepdog is
> already doing (patches 7 to 12).
> 
> Paolo Bonzini (12):
>   nbd: support feature negotiation
>   nbd: sync API definitions with upstream
>   nbd: support NBD_SET_FLAGS ioctl
>   nbd: add support for NBD_CMD_FLUSH
>   nbd: add support for NBD_CMD_FLAG_FUA
>   nbd: support NBD_CMD_TRIM in the server
>   sheepdog: add coroutine_fn markers
>   add socket_set_block
>   sheepdog: move coroutine send/recv function to generic code
>   block: add bdrv_co_flush support
>   nbd: switch to asynchronous operation
>   nbd: split requests
> 
>  block.c          |   53 ++++++++++---
>  block/nbd.c      |  225 ++++++++++++++++++++++++++++++++++++++++++++--------
>  block/sheepdog.c |  235 +++++++-----------------------------------------------
>  block_int.h      |    1 +
>  cutils.c         |  108 +++++++++++++++++++++++++
>  nbd.c            |   80 +++++++++++++++++--
>  nbd.h            |   20 ++++-
>  oslib-posix.c    |    7 ++
>  oslib-win32.c    |    6 ++
>  qemu-common.h    |    3 +
>  qemu-coroutine.c |   71 ++++++++++++++++
>  qemu-coroutine.h |   26 ++++++
>  qemu-nbd.c       |   13 ++--
>  qemu_socket.h    |    1 +
>  14 files changed, 580 insertions(+), 269 deletions(-)

There is anonther patch enabling AIO for NBD on the list [1], by
Nicholas Thomas (CCed), that lacked review so far. Can you guys please
review each others approach and then converge on a solution? I guess
Paolo's patches 1-7 can be applied in any case, probably causing minor
conflicts, but for the rest we need to decide which one to pick.

Kevin

[1] http://www.mail-archive.com/qemu-devel@nongnu.org/msg74711.html

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 00/12] nbd improvements
  2011-09-09  9:00 ` [Qemu-devel] [PATCH 00/12] nbd improvements Kevin Wolf
@ 2011-09-09 10:29   ` Paolo Bonzini
  2011-09-09 10:42     ` Kevin Wolf
  2011-09-09 10:50     ` Nicholas Thomas
  0 siblings, 2 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-09 10:29 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel, Nicholas Thomas

On 09/09/2011 11:00 AM, Kevin Wolf wrote:
> There is anonther patch enabling AIO for NBD on the list [1], by
> Nicholas Thomas (CCed), that lacked review so far. Can you guys please
> review each others approach and then converge on a solution? I guess
> Paolo's patches 1-7 can be applied in any case, probably causing minor
> conflicts, but for the rest we need to decide which one to pick.

Stefan also pointed me to Nicholas's patches yesterday.  I would go with 
mine, if only because his work predates coroutines (at least the older 
versions) and are much more complex.

On the other hand, Nicholas's work includes timeout and reconnect.  I'm 
not sure how complicated it is to include it in my series, but probably 
not much.  With coroutines, preserving the list of outstanding I/O 
requests is done implicitly by the CoMutex, so you basically have to 
check errno for ECONNRESET and similar errors, reconnect, and retry 
issuing the current request only.

Timeout can be done with a QEMUTimer that shuts down the socket 
(shutdown(2) I mean).  This triggers an EPIPE when writing, or a 
zero-sized read when reading.  The timeout can be set every time the 
coroutine is (re)entered, and reset before exiting nbd_co_readv/writev.

What do you think?

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 00/12] nbd improvements
  2011-09-09 10:29   ` Paolo Bonzini
@ 2011-09-09 10:42     ` Kevin Wolf
  2011-09-09 10:50     ` Nicholas Thomas
  1 sibling, 0 replies; 42+ messages in thread
From: Kevin Wolf @ 2011-09-09 10:42 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, Nicholas Thomas

Am 09.09.2011 12:29, schrieb Paolo Bonzini:
> On 09/09/2011 11:00 AM, Kevin Wolf wrote:
>> There is anonther patch enabling AIO for NBD on the list [1], by
>> Nicholas Thomas (CCed), that lacked review so far. Can you guys please
>> review each others approach and then converge on a solution? I guess
>> Paolo's patches 1-7 can be applied in any case, probably causing minor
>> conflicts, but for the rest we need to decide which one to pick.
> 
> Stefan also pointed me to Nicholas's patches yesterday.  I would go with 
> mine, if only because his work predates coroutines (at least the older 
> versions) and are much more complex.
> 
> On the other hand, Nicholas's work includes timeout and reconnect.  I'm 
> not sure how complicated it is to include it in my series, but probably 
> not much.  With coroutines, preserving the list of outstanding I/O 
> requests is done implicitly by the CoMutex, so you basically have to 
> check errno for ECONNRESET and similar errors, reconnect, and retry 
> issuing the current request only.
> 
> Timeout can be done with a QEMUTimer that shuts down the socket 
> (shutdown(2) I mean).  This triggers an EPIPE when writing, or a 
> zero-sized read when reading.  The timeout can be set every time the 
> coroutine is (re)entered, and reset before exiting nbd_co_readv/writev.
> 
> What do you think?

I haven't really had a look at your patches yet (I hope to do so later
today), but from the patch descriptions I would think that indeed your
patches could be the better base for a converged series.

What I would like you and Nick to do is to see what is missing from your
series compared to his one (you describe a couple of things, but let's
see if Nick knows anything else) and to agree on the next steps. I think
that a possible way is to merge your series and do the other
improvements on top, but as I said I haven't really looked into the
details yet.

Kevin

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 00/12] nbd improvements
  2011-09-09 10:29   ` Paolo Bonzini
  2011-09-09 10:42     ` Kevin Wolf
@ 2011-09-09 10:50     ` Nicholas Thomas
  2011-09-09 11:00       ` Paolo Bonzini
  2011-09-09 11:04       ` Kevin Wolf
  1 sibling, 2 replies; 42+ messages in thread
From: Nicholas Thomas @ 2011-09-09 10:50 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Kevin Wolf, qemu-devel

On Fri, 2011-09-09 at 12:29 +0200, Paolo Bonzini wrote:
> On 09/09/2011 11:00 AM, Kevin Wolf wrote:
> > There is anonther patch enabling AIO for NBD on the list [1], by
> > Nicholas Thomas (CCed), that lacked review so far. Can you guys please
> > review each others approach and then converge on a solution? I guess
> > Paolo's patches 1-7 can be applied in any case, probably causing minor
> > conflicts, but for the rest we need to decide which one to pick.
> 
> Stefan also pointed me to Nicholas's patches yesterday.  I would go with 
> mine, if only because his work predates coroutines (at least the older 
> versions) and are much more complex.

I'd concur here. I wrote the AIO portion of my patches merely to get the
code into a state where I could add timeout and reconnect logic - I'm
pretty sure the code I wrote is *correct* (we're using it in production,
after all), but I'm by no means invested in it. Your version includes
TRIM and flush support as well, which is nice.

> On the other hand, Nicholas's work includes timeout and reconnect.  I'm 
> not sure how complicated it is to include it in my series, but probably 
> not much.  With coroutines, preserving the list of outstanding I/O 
> requests is done implicitly by the CoMutex, so you basically have to 
> check errno for ECONNRESET and similar errors, reconnect, and retry 
> issuing the current request only.

Definitely a simpler approach than my version. Although, does your
version preserve write ordering? I was quite careful to ensure that
write requests would always be sent in the order they were presented;
although I don't know if qemu proper depends on that behaviour or not.

> Timeout can be done with a QEMUTimer that shuts down the socket 
> (shutdown(2) I mean).  This triggers an EPIPE when writing, or a 
> zero-sized read when reading.  The timeout can be set every time the 
> coroutine is (re)entered, and reset before exiting nbd_co_readv/writev.

for reconnect, I did consider using QEMUTimer, but when I tried it I ran
into linking problems with qemu-io. As far as I can tell, resolving that
is a significant code reorganisation - QEMUTimer pulls in lots of code
that isn't linked in at the moment (VM clocks, etc). I'm not sure it's
worth tackling that to avoid using timer_create(2). 

The timeout code I have at the moment is something of a bodge and
replacing it with an actual timer (of either kind) would be a massive
improvement, obviously.

/Nick

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 00/12] nbd improvements
  2011-09-09 10:50     ` Nicholas Thomas
@ 2011-09-09 11:00       ` Paolo Bonzini
  2011-09-09 11:04       ` Kevin Wolf
  1 sibling, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-09 11:00 UTC (permalink / raw)
  To: Nicholas Thomas; +Cc: Kevin Wolf, qemu-devel

On 09/09/2011 12:50 PM, Nicholas Thomas wrote:
>> >  On the other hand, Nicholas's work includes timeout and reconnect.  I'm
>> >  not sure how complicated it is to include it in my series, but probably
>> >  not much.  With coroutines, preserving the list of outstanding I/O
>> >  requests is done implicitly by the CoMutex, so you basically have to
>> >  check errno for ECONNRESET and similar errors, reconnect, and retry
>> >  issuing the current request only.
> Definitely a simpler approach than my version. Although, does your
> version preserve write ordering? I was quite careful to ensure that
> write requests would always be sent in the order they were presented;
> although I don't know if qemu proper depends on that behaviour or not.

Yes, the current operation does not drop the CoMutex until the error 
goes away.  Ordering is preserved because in turn no request can start 
until the first leaves the coroutine (with the CoMutex locked).

Anyway I think the guests do not depend on it, there is no ordering 
guarantee in the AIO thread pool.  If any of the more advanced file 
formats do, that would be a bug.

>> >  Timeout can be done with a QEMUTimer that shuts down the socket
>> >  (shutdown(2) I mean).  This triggers an EPIPE when writing, or a
>> >  zero-sized read when reading.  The timeout can be set every time the
>> >  coroutine is (re)entered, and reset before exiting nbd_co_readv/writev.
> for reconnect, I did consider using QEMUTimer, but when I tried it I ran
> into linking problems with qemu-io.   As far as I can tell, resolving that
> is a significant code reorganisation - QEMUTimer pulls in lots of code
> that isn't linked in at the moment (VM clocks, etc). I'm not sure it's
> worth tackling that to avoid using timer_create(2).

I agree.  I think it's worth it because it would help Anthony's work 
with unit testing too; but it's also a significant amount of work.  But 
in this sense, keeping each feature in a separate patch helps a lot.  If 
something is done in a hacky way, it's much easier to redo it later if 
"git show" gives a good overview of how it was done.

I saw earlier versions of your patch had problems with the upstream nbd 
server.  It works for me, but you'd be welcome trying out my series.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 00/12] nbd improvements
  2011-09-09 10:50     ` Nicholas Thomas
  2011-09-09 11:00       ` Paolo Bonzini
@ 2011-09-09 11:04       ` Kevin Wolf
  2011-09-09 14:51         ` Nicholas Thomas
  1 sibling, 1 reply; 42+ messages in thread
From: Kevin Wolf @ 2011-09-09 11:04 UTC (permalink / raw)
  To: Nicholas Thomas; +Cc: Paolo Bonzini, qemu-devel

Am 09.09.2011 12:50, schrieb Nicholas Thomas:
> On Fri, 2011-09-09 at 12:29 +0200, Paolo Bonzini wrote:
>> On 09/09/2011 11:00 AM, Kevin Wolf wrote:
>>> There is anonther patch enabling AIO for NBD on the list [1], by
>>> Nicholas Thomas (CCed), that lacked review so far. Can you guys please
>>> review each others approach and then converge on a solution? I guess
>>> Paolo's patches 1-7 can be applied in any case, probably causing minor
>>> conflicts, but for the rest we need to decide which one to pick.
>>
>> Stefan also pointed me to Nicholas's patches yesterday.  I would go with 
>> mine, if only because his work predates coroutines (at least the older 
>> versions) and are much more complex.
> 
> I'd concur here. I wrote the AIO portion of my patches merely to get the
> code into a state where I could add timeout and reconnect logic - I'm
> pretty sure the code I wrote is *correct* (we're using it in production,
> after all), but I'm by no means invested in it. Your version includes
> TRIM and flush support as well, which is nice.

Good to see agreement here. Do you think that Paolo's patches need to be
changed or can we do everything else on top?

>> On the other hand, Nicholas's work includes timeout and reconnect.  I'm 
>> not sure how complicated it is to include it in my series, but probably 
>> not much.  With coroutines, preserving the list of outstanding I/O 
>> requests is done implicitly by the CoMutex, so you basically have to 
>> check errno for ECONNRESET and similar errors, reconnect, and retry 
>> issuing the current request only.
> 
> Definitely a simpler approach than my version. Although, does your
> version preserve write ordering? I was quite careful to ensure that
> write requests would always be sent in the order they were presented;
> although I don't know if qemu proper depends on that behaviour or not.

This is stricter semantics than is required. Write order is only
guaranteed if request B is issued after request A has completed. If A
has been submitted, but hasn't completed yet, and you submit B, then the
order is undefined.

>> Timeout can be done with a QEMUTimer that shuts down the socket 
>> (shutdown(2) I mean).  This triggers an EPIPE when writing, or a 
>> zero-sized read when reading.  The timeout can be set every time the 
>> coroutine is (re)entered, and reset before exiting nbd_co_readv/writev.
> 
> for reconnect, I did consider using QEMUTimer, but when I tried it I ran
> into linking problems with qemu-io. As far as I can tell, resolving that
> is a significant code reorganisation - QEMUTimer pulls in lots of code
> that isn't linked in at the moment (VM clocks, etc). I'm not sure it's
> worth tackling that to avoid using timer_create(2). 
> 
> The timeout code I have at the moment is something of a bodge and
> replacing it with an actual timer (of either kind) would be a massive
> improvement, obviously.

We do have some timer stubs in qemu-tool.c since May. Of course, they
are just stubs and never really trigger the timer, but I assume that the
timeouts are really meant for qemu proper anyway, rather than qemu-img
or qemu-io.

Kevin

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 00/12] nbd improvements
  2011-09-09 11:04       ` Kevin Wolf
@ 2011-09-09 14:51         ` Nicholas Thomas
  0 siblings, 0 replies; 42+ messages in thread
From: Nicholas Thomas @ 2011-09-09 14:51 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: Paolo Bonzini, qemu-devel

On 09/09/11 12:04, Kevin Wolf wrote:

> Good to see agreement here. Do you think that Paolo's patches need to be
> changed or can we do everything else on top?

A few things have come up on a third read, actually. I'll respond in due
course to the appropriate patch.

> We do have some timer stubs in qemu-tool.c since May. Of course, they
> are just stubs and never really trigger the timer, but I assume that the
> timeouts are really meant for qemu proper anyway, rather than qemu-img
> or qemu-io.

I wasn't aware of these - however, using them would make testing
timeout/reconnect quite a bit harder. And surely there are use cases of
qemu-img that would benefit from automatic reconnection?

I can work around the former, and we don't actually use qemu-img for
anything ourselves, so if you think this is the right way to go about
it, I'm happy to rework the timeout/reconnect using QEMUTimer, on top of
Paolo's patches.

/Nick

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 11/12] nbd: switch to asynchronous operation
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 11/12] nbd: switch to asynchronous operation Paolo Bonzini
@ 2011-09-09 14:52   ` Nicholas Thomas
  2011-09-09 15:03     ` Paolo Bonzini
  0 siblings, 1 reply; 42+ messages in thread
From: Nicholas Thomas @ 2011-09-09 14:52 UTC (permalink / raw)
  To: kwolf; +Cc: Paolo Bonzini, qemu-devel

On 08/09/11 16:25, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/nbd.c |  167 ++++++++++++++++++++++++++++++++++++++--------------------
>  nbd.c       |    8 +++
>  2 files changed, 117 insertions(+), 58 deletions(-)
> 
> diff --git a/block/nbd.c b/block/nbd.c
> index 964caa8..5a75263 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -52,6 +52,9 @@ typedef struct BDRVNBDState {
>      size_t blocksize;
>      char *export_name; /* An NBD server may export several devices */
>  
> +    CoMutex mutex;
> +    Coroutine *coroutine;
> +
>      /* If it begins with  '/', this is a UNIX domain socket. Otherwise,
>       * it's a string of the form <hostname|ip4|\[ip6\]>:port
>       */
> @@ -104,6 +107,37 @@ out:
>      return err;
>  }
>  
> +static void nbd_coroutine_start(BDRVNBDState *s)
> +{
> +    qemu_co_mutex_lock(&s->mutex);
> +    s->coroutine = qemu_coroutine_self();
> +}
> +
> +static void nbd_coroutine_enter(void *opaque)
> +{
> +    BDRVNBDState *s = opaque;
> +    qemu_coroutine_enter(s->coroutine, NULL);
> +}
> +
> +static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request)
> +{
> +    qemu_aio_set_fd_handler(s->sock, NULL, nbd_coroutine_enter, NULL, NULL, s);
> +    return nbd_send_request(s->sock, request);
> +}
> +
> +static int nbd_co_receive_reply(BDRVNBDState *s, struct nbd_reply *reply)
> +{
> +    qemu_aio_set_fd_handler(s->sock, nbd_coroutine_enter, NULL, NULL, NULL, s);
> +    return nbd_receive_reply(s->sock, reply);
> +}
> +
> +static void nbd_coroutine_end(BDRVNBDState *s)
> +{
> +    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL, s);
> +    s->coroutine = NULL;
> +    qemu_co_mutex_unlock(&s->mutex);
> +}
> +
>  static int nbd_establish_connection(BlockDriverState *bs)
>  {
>      BDRVNBDState *s = bs->opaque;
> @@ -163,6 +197,8 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
>      BDRVNBDState *s = bs->opaque;
>      int result;
>  
> +    qemu_co_mutex_init(&s->mutex);
> +
>      /* Pop the config into our state object. Exit if invalid. */
>      result = nbd_config(s, filename, flags);
>      if (result != 0) {
> @@ -177,8 +213,8 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
>      return result;
>  }
>  
> -static int nbd_read(BlockDriverState *bs, int64_t sector_num,
> -                    uint8_t *buf, int nb_sectors)
> +static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
> +                        int nb_sectors, QEMUIOVector *qiov)
>  {
>      BDRVNBDState *s = bs->opaque;
>      struct nbd_request request;
> @@ -189,30 +225,39 @@ static int nbd_read(BlockDriverState *bs, int64_t sector_num,
>      request.from = sector_num * 512;;
>      request.len = nb_sectors * 512;
>  
> -    if (nbd_send_request(s->sock, &request) == -1)
> -        return -errno;
> -
> -    if (nbd_receive_reply(s->sock, &reply) == -1)
> -        return -errno;
> -
> -    if (reply.error !=0)
> -        return -reply.error;
> -
> -    if (reply.handle != request.handle)
> -        return -EIO;
> +    nbd_coroutine_start(s);
> +    if (nbd_co_send_request(s, &request) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    if (nbd_co_receive_reply(s, &reply) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    if (reply.error != 0) {
> +        goto done;
> +    }
> +    if (reply.handle != request.handle) {
> +        reply.error = EIO;
> +        goto done;
> +    }
> +    if (qemu_co_recvv(s->sock, qiov->iov, request.len, 0) != request.len) {
> +        reply.error = EIO;
> +    }
>  
> -    if (nbd_wr_sync(s->sock, buf, request.len, 1) != request.len)
> -        return -EIO;
> +done:
> +    nbd_coroutine_end(s);
> +    return -reply.error;
>  
> -    return 0;
>  }

I'm a bit unsure here, actually. So you lock a mutex, send a request,
wait for a response, then unlock the mutex. Surely this code doesn't
allow more than one request to be in flight at a time?

My approach was to write the request to the socket as soon as possible.
IIRC, QEMU can have up to 16 IOs outstanding, so this gave us "some"
speedup, although I don't have formal before/after benchmarks. For
testing, speed isn't too important, but we're using NBD in production to
run VMs on a 10GigE network with SAS and SSD storage, so we're somewhat
interested in performance :).

If this *is* letting > 1 request be on the wire at a time, then
request.handle needs to be unique to the request. I don't think:

    request.handle = (uint64_t)(intptr_t)bs;

is.


> -static int nbd_write(BlockDriverState *bs, int64_t sector_num,
> -                     const uint8_t *buf, int nb_sectors)
> +static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
> +                         int nb_sectors, QEMUIOVector *qiov)
>  {
>      BDRVNBDState *s = bs->opaque;
>      struct nbd_request request;
>      struct nbd_reply reply;
> +    int ret;
>  
>      request.type = NBD_CMD_WRITE;
>      if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
> @@ -223,25 +268,30 @@ static int nbd_write(BlockDriverState *bs, int64_t sector_num,
>      request.from = sector_num * 512;;
>      request.len = nb_sectors * 512;
>  
> -    if (nbd_send_request(s->sock, &request) == -1)
> -        return -errno;
> -
> -    if (nbd_wr_sync(s->sock, (uint8_t*)buf, request.len, 0) != request.len)
> -        return -EIO;
> -
> -    if (nbd_receive_reply(s->sock, &reply) == -1)
> -        return -errno;
> -
> -    if (reply.error !=0)
> -        return -reply.error;
> -
> -    if (reply.handle != request.handle)
> -        return -EIO;
> +    nbd_coroutine_start(s);
> +    if (nbd_co_send_request(s, &request) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    ret = qemu_co_sendv(s->sock, qiov->iov, request.len, 0);
> +    if (ret != request.len) {
> +        reply.error = EIO;
> +        goto done;
> +    }
> +    if (nbd_co_receive_reply(s, &reply) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    if (reply.handle != request.handle) {
> +        reply.error = EIO;
> +    }
>  
> -    return 0;
> +done:
> +    nbd_coroutine_end(s);
> +    return -reply.error;
>  }
>  
> -static int nbd_flush(BlockDriverState *bs)
> +static int nbd_co_flush(BlockDriverState *bs)
>  {
>      BDRVNBDState *s = bs->opaque;
>      struct nbd_request request;
> @@ -260,19 +310,22 @@ static int nbd_flush(BlockDriverState *bs)
>      request.from = 0;
>      request.len = 0;
>  
> -    if (nbd_send_request(s->sock, &request) == -1)
> -        return -errno;
> -
> -    if (nbd_receive_reply(s->sock, &reply) == -1)
> -        return -errno;
> -
> -    if (reply.error !=0)
> -        return -reply.error;
> -
> -    if (reply.handle != request.handle)
> -        return -EIO;
> +    nbd_coroutine_start(s);
> +    if (nbd_co_send_request(s, &request) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    if (nbd_co_receive_reply(s, &reply) == -1) {
> +        reply.error = errno;
> +        goto done;
> +    }
> +    if (reply.error == 0 && reply.handle != request.handle) {
> +        reply.error = EIO;
> +    }
>  
> -    return 0;
> +done:
> +    nbd_coroutine_end(s);
> +    return -reply.error;
>  }
>  
>  static int nbd_discard(BlockDriverState *bs, int64_t sector_num,
> @@ -290,19 +343,17 @@ static int nbd_discard(BlockDriverState *bs, int64_t sector_num,
>      request.from = sector_num * 512;;
>      request.len = nb_sectors * 512;
>  
> -    if (nbd_send_request(s->sock, &request) == -1)
> +    if (nbd_send_request(s->sock, &request) == -1) {
>          return -errno;
> -
> -    if (nbd_receive_reply(s->sock, &reply) == -1)
> +    }
> +    if (nbd_receive_reply(s->sock, &reply) == -1) {
>          return -errno;
> -
> -    if (reply.error !=0)
> -        return -reply.error;
> -
> -    if (reply.handle != request.handle)
> +    }
> +    if (reply.error == 0 && reply.handle != request.handle) {
>          return -EIO;
> +    }
>  
> -    return 0;
> +    return -reply.error;
>  }
>  
>  static void nbd_close(BlockDriverState *bs)
> @@ -325,10 +376,10 @@ static BlockDriver bdrv_nbd = {
>      .format_name	= "nbd",
>      .instance_size	= sizeof(BDRVNBDState),
>      .bdrv_file_open	= nbd_open,
> -    .bdrv_read		= nbd_read,
> -    .bdrv_write		= nbd_write,
> +    .bdrv_co_readv	= nbd_co_readv,
> +    .bdrv_co_writev	= nbd_co_writev,
>      .bdrv_close		= nbd_close,
> -    .bdrv_flush		= nbd_flush,
> +    .bdrv_co_flush	= nbd_co_flush,
>      .bdrv_discard	= nbd_discard,
>      .bdrv_getlength	= nbd_getlength,
>      .protocol_name	= "nbd",
> diff --git a/nbd.c b/nbd.c
> index f089904..2f4c6b3 100644
> --- a/nbd.c
> +++ b/nbd.c
> @@ -80,6 +80,14 @@ size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
>  {
>      size_t offset = 0;
>  
> +    if (qemu_in_coroutine()) {
> +        if (do_read) {
> +            return qemu_co_recv(fd, buffer, size);
> +        } else {
> +            return qemu_co_send(fd, buffer, size);
> +        }
> +    }
> +
>      while (offset < size) {
>          ssize_t len;
>  

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 12/12] nbd: split requests
  2011-09-08 15:25 ` [Qemu-devel] [PATCH 12/12] nbd: split requests Paolo Bonzini
@ 2011-09-09 14:52   ` Nicholas Thomas
  2011-09-09 15:33     ` Paolo Bonzini
  0 siblings, 1 reply; 42+ messages in thread
From: Nicholas Thomas @ 2011-09-09 14:52 UTC (permalink / raw)
  To: kwolf; +Cc: Paolo Bonzini, qemu-devel

On 08/09/11 16:25, Paolo Bonzini wrote:
> qemu-nbd has a limit of slightly less than 1M per request.  Work
> around this in the nbd block driver.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/nbd.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++------
>  1 files changed, 46 insertions(+), 6 deletions(-)
> 
> diff --git a/block/nbd.c b/block/nbd.c
> index 5a75263..468a517 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -213,8 +213,9 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
>      return result;
>  }
>  
> -static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
> -                        int nb_sectors, QEMUIOVector *qiov)
> +static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
> +                          int nb_sectors, QEMUIOVector *qiov,
> +                          int offset)
>  {
>      BDRVNBDState *s = bs->opaque;
>      struct nbd_request request;
> @@ -241,7 +242,7 @@ static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
>          reply.error = EIO;
>          goto done;
>      }
> -    if (qemu_co_recvv(s->sock, qiov->iov, request.len, 0) != request.len) {
> +    if (qemu_co_recvv(s->sock, qiov->iov, request.len, offset) != request.len) {
>          reply.error = EIO;
>      }
>  
> @@ -251,8 +252,9 @@ done:
>  
>  }
>  
> -static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
> -                         int nb_sectors, QEMUIOVector *qiov)
> +static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
> +                           int nb_sectors, QEMUIOVector *qiov,
> +                           int offset)
>  {
>      BDRVNBDState *s = bs->opaque;
>      struct nbd_request request;
> @@ -273,7 +275,7 @@ static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
>          reply.error = errno;
>          goto done;
>      }
> -    ret = qemu_co_sendv(s->sock, qiov->iov, request.len, 0);
> +    ret = qemu_co_sendv(s->sock, qiov->iov, request.len, offset);
>      if (ret != request.len) {
>          reply.error = EIO;
>          goto done;
> @@ -291,6 +293,44 @@ done:
>      return -reply.error;
>  }
>  
> +/* qemu-nbd has a limit of slightly less than 1M per request.  For safety,
> + * transfer at most 512K per request. */
> +#define NBD_MAX_SECTORS 1024

As far as I'm aware, the limit of 1MiB - header size is common to all
NBD servers. I'm not aware of anything at all that'll fail on a 768K
request but succeed in the exact same circumstances on a 512K request.
Again, this is a performance consideration - each request is relatively
slow, so you don't want them to be unnecessarily small.


> +
> +static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
> +                        int nb_sectors, QEMUIOVector *qiov)
> +{
> +    int offset = 0;
> +    int ret;
> +    while (nb_sectors > NBD_MAX_SECTORS) {
> +        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +        offset += NBD_MAX_SECTORS * 512;
> +        sector_num += NBD_MAX_SECTORS;
> +        nb_sectors -= NBD_MAX_SECTORS;
> +    }
> +    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
> +}
> +
> +static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
> +                         int nb_sectors, QEMUIOVector *qiov)
> +{
> +    int offset = 0;
> +    int ret;
> +    while (nb_sectors > NBD_MAX_SECTORS) {
> +        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +        offset += NBD_MAX_SECTORS * 512;
> +        sector_num += NBD_MAX_SECTORS;
> +        nb_sectors -= NBD_MAX_SECTORS;
> +    }
> +    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
> +}
> +
>  static int nbd_co_flush(BlockDriverState *bs)
>  {
>      BDRVNBDState *s = bs->opaque;

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 11/12] nbd: switch to asynchronous operation
  2011-09-09 14:52   ` Nicholas Thomas
@ 2011-09-09 15:03     ` Paolo Bonzini
  2011-09-09 15:34       ` Paolo Bonzini
  0 siblings, 1 reply; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-09 15:03 UTC (permalink / raw)
  To: Nicholas Thomas; +Cc: kwolf, qemu-devel

On 09/09/2011 04:52 PM, Nicholas Thomas wrote:
> I'm a bit unsure here, actually. So you lock a mutex, send a request,
> wait for a response, then unlock the mutex. Surely this code doesn't
> allow more than one request to be in flight at a time?

No, it doesn't.  It shouldn't be hard to do it though. You could have 
two mutexes, one for sending and one for receiving.  You yield after 
sending, and let nbd_coroutine_restart read the reply.  It can then 
reenter that reply's coroutine based on the handle in the reply.  I 
still prefer to do it in a separate patch.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 12/12] nbd: split requests
  2011-09-09 14:52   ` Nicholas Thomas
@ 2011-09-09 15:33     ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-09 15:33 UTC (permalink / raw)
  To: Nicholas Thomas; +Cc: kwolf, qemu-devel

On 09/09/2011 04:52 PM, Nicholas Thomas wrote:
>> >  +/* qemu-nbd has a limit of slightly less than 1M per request.  For safety,
>> >  + * transfer at most 512K per request. */
>> >  +#define NBD_MAX_SECTORS 1024
>
> As far as I'm aware, the limit of 1MiB - header size is common to all
> NBD servers. I'm not aware of anything at all that'll fail on a 768K
> request but succeed in the exact same circumstances on a 512K request.
> Again, this is a performance consideration - each request is relatively
> slow, so you don't want them to be unnecessarily small.

Yes, it should probably be bumped to 1536 or 2040 (to keep requests 
4k-aligned).  I wasn't sure about the limit.  I've never seen requests 
that big anyway.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 11/12] nbd: switch to asynchronous operation
  2011-09-09 15:03     ` Paolo Bonzini
@ 2011-09-09 15:34       ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-09 15:34 UTC (permalink / raw)
  Cc: kwolf, qemu-devel, Nicholas Thomas

On 09/09/2011 05:03 PM, Paolo Bonzini wrote:
>
>> I'm a bit unsure here, actually. So you lock a mutex, send a request,
>> wait for a response, then unlock the mutex. Surely this code doesn't
>> allow more than one request to be in flight at a time?
>
> No, it doesn't.  It shouldn't be hard to do it though. You could have
> two mutexes, one for sending and one for receiving.  You yield after
> sending, and let nbd_coroutine_restart read the reply.  It can then
> reenter that reply's coroutine based on the handle in the reply.  I
> still prefer to do it in a separate patch.

There is a problem with discard requests because they are not coroutine 
based (yet).  But it would be the same even in your AIO implementation, 
unfortunately.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 02/12] nbd: sync API definitions with upstream
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 02/12] nbd: sync API definitions with upstream Paolo Bonzini
@ 2011-09-12 14:15   ` Kevin Wolf
  2011-09-12 15:08     ` Paolo Bonzini
  0 siblings, 1 reply; 42+ messages in thread
From: Kevin Wolf @ 2011-09-12 14:15 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

Am 08.09.2011 17:24, schrieb Paolo Bonzini:
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  nbd.c |    2 ++
>  nbd.h |   11 ++++++++++-
>  2 files changed, 12 insertions(+), 1 deletions(-)

Which upstream? I can't find any NBD version that defines a command/flag
for TRIM.

Kevin

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 02/12] nbd: sync API definitions with upstream
  2011-09-12 14:15   ` Kevin Wolf
@ 2011-09-12 15:08     ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-12 15:08 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel

On 09/12/2011 04:15 PM, Kevin Wolf wrote:
>> >  Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
>> >  ---
>> >    nbd.c |    2 ++
>> >    nbd.h |   11 ++++++++++-
>> >    2 files changed, 12 insertions(+), 1 deletions(-)
> Which upstream? I can't find any NBD version that defines a command/flag
> for TRIM.

Upstream userspace maintainer said he applied the patch, but apparently 
hasn't pushed yet.

http://article.gmane.org/gmane.linux.drivers.nbd.general/1084

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v2 09/12] sheepdog: move coroutine send/recv function to generic code
  2011-09-09  8:11     ` [Qemu-devel] [PATCH v2 " Paolo Bonzini
@ 2011-09-13  0:28       ` MORITA Kazutaka
  2011-09-13 14:14       ` Kevin Wolf
  1 sibling, 0 replies; 42+ messages in thread
From: MORITA Kazutaka @ 2011-09-13  0:28 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, MORITA Kazutaka

At Fri,  9 Sep 2011 10:11:38 +0200,
Paolo Bonzini wrote:
> 
> Outside coroutines, avoid busy waiting on EAGAIN by temporarily
> making the socket blocking.
> 
> The API of qemu_recvv/qemu_sendv is slightly different from
> do_readv/do_writev because they do not handle coroutines.  It
> returns the number of bytes written before encountering an
> EAGAIN.  The specificity of yielding on EAGAIN is entirely in
> qemu-coroutine.c.
> 
> Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> 	Thanks for the review.  I checked with qemu-io that all of
> 
> 		readv -v 0 524288 (x8)
>                 readv -v 0 262144 (x16)
>                 readv -v 0 1024 (x4096)
>                 readv -v 0 1536 (x2730) 1024
>                 readv -v 0 1024 512 (x2730) 1024
> 
> 	work and produce the same output, while previously they would fail.
> 	Looks like it's hard to trigger the code just with qemu.
> 
>  block/sheepdog.c |  225 ++++++------------------------------------------------
>  cutils.c         |  103 +++++++++++++++++++++++++
>  qemu-common.h    |    3 +
>  qemu-coroutine.c |   70 +++++++++++++++++
>  qemu-coroutine.h |   26 ++++++
>  5 files changed, 225 insertions(+), 202 deletions(-)

Thanks, this passed qemu-iotests on Sheepdog.

Reviewed-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 04/12] nbd: add support for NBD_CMD_FLUSH
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 04/12] nbd: add support for NBD_CMD_FLUSH Paolo Bonzini
@ 2011-09-13 13:52   ` Kevin Wolf
  2011-09-13 15:13     ` Paolo Bonzini
  0 siblings, 1 reply; 42+ messages in thread
From: Kevin Wolf @ 2011-09-13 13:52 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

Am 08.09.2011 17:24, schrieb Paolo Bonzini:
> Note for the brace police: the style in this commit and the following
> is consistent with the rest of the file.  It is then fixed together with
> the introduction of coroutines.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/nbd.c |   31 +++++++++++++++++++++++++++++++
>  nbd.c       |   14 +++++++++++++-
>  2 files changed, 44 insertions(+), 1 deletions(-)
> 
> diff --git a/block/nbd.c b/block/nbd.c
> index ffc57a9..4a195dc 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -237,6 +237,36 @@ static int nbd_write(BlockDriverState *bs, int64_t sector_num,
>      return 0;
>  }
>  
> +static int nbd_flush(BlockDriverState *bs)
> +{
> +    BDRVNBDState *s = bs->opaque;
> +    struct nbd_request request;
> +    struct nbd_reply reply;
> +
> +    if (!(s->nbdflags & NBD_FLAG_SEND_FLUSH)) {
> +        return 0;
> +    }
> +
> +    request.type = NBD_CMD_FLUSH;
> +    request.handle = (uint64_t)(intptr_t)bs;
> +    request.from = 0;
> +    request.len = 0;
> +
> +    if (nbd_send_request(s->sock, &request) == -1)
> +        return -errno;
> +
> +    if (nbd_receive_reply(s->sock, &reply) == -1)
> +        return -errno;
> +
> +    if (reply.error !=0)

Missing space (this is not for consistency, right?)

> @@ -682,6 +683,18 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
>          TRACE("Request type is DISCONNECT");
>          errno = 0;
>          return 1;
> +    case NBD_CMD_FLUSH:
> +        TRACE("Request type is FLUSH");
> +
> +        if (bdrv_flush(bs) == -1) {

bdrv_flush is supposed to return -errno, so please check for < 0. (I see
that raw-posix needs to be fixed, but other block drivers already return
error values other than -1)

Kevin

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 05/12] nbd: add support for NBD_CMD_FLAG_FUA
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 05/12] nbd: add support for NBD_CMD_FLAG_FUA Paolo Bonzini
@ 2011-09-13 13:55   ` Kevin Wolf
  0 siblings, 0 replies; 42+ messages in thread
From: Kevin Wolf @ 2011-09-13 13:55 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

Am 08.09.2011 17:24, schrieb Paolo Bonzini:
> The server can use it to issue a flush automatically after a
> write.  The client can also use it to mimic a write-through
> cache.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/nbd.c |    8 ++++++++
>  nbd.c       |   13 +++++++++++--
>  2 files changed, 19 insertions(+), 2 deletions(-)

> @@ -674,6 +675,14 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
>              }
>  
>              *offset += request.len;
> +
> +            if (request.type & NBD_CMD_FLAG_FUA) {
> +                if (bdrv_flush(bs) == -1) {

Need to check for < 0 here as well.

Kevin

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server Paolo Bonzini
@ 2011-09-13 13:58   ` Kevin Wolf
  2011-09-13 15:14     ` Paolo Bonzini
  2011-09-14 15:44   ` Christoph Hellwig
  1 sibling, 1 reply; 42+ messages in thread
From: Kevin Wolf @ 2011-09-13 13:58 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

Am 08.09.2011 17:24, schrieb Paolo Bonzini:
> Map it to bdrv_discard.  The server can now expose NBD_FLAG_SEND_TRIM.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/nbd.c |   31 +++++++++++++++++++++++++++++++
>  nbd.c       |    9 ++++++++-
>  2 files changed, 39 insertions(+), 1 deletions(-)
> 
> diff --git a/block/nbd.c b/block/nbd.c
> index 5a7812c..964caa8 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -275,6 +275,36 @@ static int nbd_flush(BlockDriverState *bs)
>      return 0;
>  }
>  
> +static int nbd_discard(BlockDriverState *bs, int64_t sector_num,
> +                       int nb_sectors)
> +{
> +    BDRVNBDState *s = bs->opaque;
> +    struct nbd_request request;
> +    struct nbd_reply reply;
> +
> +    if (!(s->nbdflags & NBD_FLAG_SEND_TRIM)) {
> +        return 0;
> +    }
> +    request.type = NBD_CMD_TRIM;
> +    request.handle = (uint64_t)(intptr_t)bs;
> +    request.from = sector_num * 512;;
> +    request.len = nb_sectors * 512;
> +
> +    if (nbd_send_request(s->sock, &request) == -1)
> +        return -errno;
> +
> +    if (nbd_receive_reply(s->sock, &reply) == -1)
> +        return -errno;
> +
> +    if (reply.error !=0)
> +        return -reply.error;
> +
> +    if (reply.handle != request.handle)
> +        return -EIO;
> +
> +    return 0;
> +}
> +
>  static void nbd_close(BlockDriverState *bs)
>  {
>      BDRVNBDState *s = bs->opaque;
> @@ -299,6 +329,7 @@ static BlockDriver bdrv_nbd = {
>      .bdrv_write		= nbd_write,
>      .bdrv_close		= nbd_close,
>      .bdrv_flush		= nbd_flush,
> +    .bdrv_discard	= nbd_discard,
>      .bdrv_getlength	= nbd_getlength,
>      .protocol_name	= "nbd",
>  };
> diff --git a/nbd.c b/nbd.c
> index b65fb4a..f089904 100644
> --- a/nbd.c
> +++ b/nbd.c
> @@ -194,7 +194,7 @@ int nbd_negotiate(int csock, off_t size, uint32_t flags)
>      cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
>      cpu_to_be64w((uint64_t*)(buf + 16), size);
>      cpu_to_be32w((uint32_t*)(buf + 24),
> -                 flags | NBD_FLAG_HAS_FLAGS |
> +                 flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
>                   NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
>      memset(buf + 28, 0, 124);
>  
> @@ -703,6 +703,13 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
>          if (nbd_send_reply(csock, &reply) == -1)
>              return -1;
>          break;
> +    case NBD_CMD_TRIM:
> +        TRACE("Request type is TRIM");
> +        bdrv_discard(bs, (request.from + dev_offset) / 512,
> +                     request.len / 512);

Errors are completely ignored? Does the NBD protocol not allow to return
an error?

Kevin

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v2 09/12] sheepdog: move coroutine send/recv function to generic code
  2011-09-09  8:11     ` [Qemu-devel] [PATCH v2 " Paolo Bonzini
  2011-09-13  0:28       ` MORITA Kazutaka
@ 2011-09-13 14:14       ` Kevin Wolf
  2011-09-13 15:16         ` Paolo Bonzini
  1 sibling, 1 reply; 42+ messages in thread
From: Kevin Wolf @ 2011-09-13 14:14 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, MORITA Kazutaka

Am 09.09.2011 10:11, schrieb Paolo Bonzini:
> Outside coroutines, avoid busy waiting on EAGAIN by temporarily
> making the socket blocking.
> 
> The API of qemu_recvv/qemu_sendv is slightly different from
> do_readv/do_writev because they do not handle coroutines.  It
> returns the number of bytes written before encountering an
> EAGAIN.  The specificity of yielding on EAGAIN is entirely in
> qemu-coroutine.c.
> 
> Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> 	Thanks for the review.  I checked with qemu-io that all of
> 
> 		readv -v 0 524288 (x8)
>                 readv -v 0 262144 (x16)
>                 readv -v 0 1024 (x4096)
>                 readv -v 0 1536 (x2730) 1024
>                 readv -v 0 1024 512 (x2730) 1024
> 
> 	work and produce the same output, while previously they would fail.
> 	Looks like it's hard to trigger the code just with qemu.
> 
>  block/sheepdog.c |  225 ++++++------------------------------------------------
>  cutils.c         |  103 +++++++++++++++++++++++++
>  qemu-common.h    |    3 +
>  qemu-coroutine.c |   70 +++++++++++++++++
>  qemu-coroutine.h |   26 ++++++

Can we move the code somewhere else? This is not core coroutine
infrastructure. I would suggest qemu_socket.h/qemu-sockets.c.

Kevin

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 04/12] nbd: add support for NBD_CMD_FLUSH
  2011-09-13 13:52   ` Kevin Wolf
@ 2011-09-13 15:13     ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-13 15:13 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel

On 09/13/2011 03:52 PM, Kevin Wolf wrote:
>> >  +
>> >  +    if (reply.error !=0)
>
> Missing space (this is not for consistency, right?)

Well, cut and paste implies consistency. :)  Will fix.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server
  2011-09-13 13:58   ` Kevin Wolf
@ 2011-09-13 15:14     ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-13 15:14 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel

On 09/13/2011 03:58 PM, Kevin Wolf wrote:
>> >  +    case NBD_CMD_TRIM:
>> >  +        TRACE("Request type is TRIM");
>> >  +        bdrv_discard(bs, (request.from + dev_offset) / 512,
>> >  +                     request.len / 512);
>
> Errors are completely ignored? Does the NBD protocol not allow to return
> an error?

Actually it does, will fix.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v2 09/12] sheepdog: move coroutine send/recv function to generic code
  2011-09-13 14:14       ` Kevin Wolf
@ 2011-09-13 15:16         ` Paolo Bonzini
  2011-09-13 15:36           ` Kevin Wolf
  0 siblings, 1 reply; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-13 15:16 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel, MORITA Kazutaka

On 09/13/2011 04:14 PM, Kevin Wolf wrote:
>> >    block/sheepdog.c |  225 ++++++------------------------------------------------
>> >    cutils.c         |  103 +++++++++++++++++++++++++
>> >    qemu-common.h    |    3 +
>> >    qemu-coroutine.c |   70 +++++++++++++++++
>> >    qemu-coroutine.h |   26 ++++++
>
> Can we move the code somewhere else? This is not core coroutine
> infrastructure. I would suggest qemu_socket.h/qemu-sockets.c.

It's not really socket-specific either (it uses recv/send only because 
of Windows brokenness---it could use read/write if it wasn't for that). 
  I hoped sooner or later it could become a qemu_co_readv/writev, hence 
the choice of qemu-coroutine.c.

Paolo

ps: I also hope that the Earth will start spinning slower and will give 
me 32 hour days, so just tell me if you really want that outside 
qemu-coroutine.c.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v2 09/12] sheepdog: move coroutine send/recv function to generic code
  2011-09-13 15:16         ` Paolo Bonzini
@ 2011-09-13 15:36           ` Kevin Wolf
  2011-09-13 15:38             ` Paolo Bonzini
  0 siblings, 1 reply; 42+ messages in thread
From: Kevin Wolf @ 2011-09-13 15:36 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, MORITA Kazutaka

Am 13.09.2011 17:16, schrieb Paolo Bonzini:
> On 09/13/2011 04:14 PM, Kevin Wolf wrote:
>>>>    block/sheepdog.c |  225 ++++++------------------------------------------------
>>>>    cutils.c         |  103 +++++++++++++++++++++++++
>>>>    qemu-common.h    |    3 +
>>>>    qemu-coroutine.c |   70 +++++++++++++++++
>>>>    qemu-coroutine.h |   26 ++++++
>>
>> Can we move the code somewhere else? This is not core coroutine
>> infrastructure. I would suggest qemu_socket.h/qemu-sockets.c.
> 
> It's not really socket-specific either (it uses recv/send only because 
> of Windows brokenness---it could use read/write if it wasn't for that). 
>   I hoped sooner or later it could become a qemu_co_readv/writev, hence 
> the choice of qemu-coroutine.c.
> 
> Paolo
> 
> ps: I also hope that the Earth will start spinning slower and will give 
> me 32 hour days, so just tell me if you really want that outside 
> qemu-coroutine.c.

Yes, I do want it outside qemu-coroutine.c.

If you prefer putting it next to qemu_write_full() and friends rather
than into the sockets file, feel free to do that.

Kevin

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v2 09/12] sheepdog: move coroutine send/recv function to generic code
  2011-09-13 15:36           ` Kevin Wolf
@ 2011-09-13 15:38             ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-13 15:38 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-devel, MORITA Kazutaka

On 09/13/2011 05:36 PM, Kevin Wolf wrote:
> If you prefer putting it next to qemu_write_full() and friends rather
> than into the sockets file, feel free to do that.

Yes, that makes good sense.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 00/12] nbd improvements
  2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
                   ` (12 preceding siblings ...)
  2011-09-09  9:00 ` [Qemu-devel] [PATCH 00/12] nbd improvements Kevin Wolf
@ 2011-09-14  9:50 ` Kevin Wolf
  13 siblings, 0 replies; 42+ messages in thread
From: Kevin Wolf @ 2011-09-14  9:50 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

Am 08.09.2011 17:24, schrieb Paolo Bonzini:
> I find nbd quite useful to test migration, but it is limited:
> it can only do synchronous operation, it is not safe because it
> does not support flush, and it has no discard either.  qemu-nbd
> is also limited to 1MB requests, and the nbd block driver does
> not take this into account.
> 
> Luckily, flush/FUA support is being worked out by upstream,
> and discard can also be added with the same framework (patches
> 1 to 6).
> 
> Asynchronous support is also very similar to what sheepdog is
> already doing (patches 7 to 12).
> 
> Paolo Bonzini (12):
>   nbd: support feature negotiation
>   nbd: sync API definitions with upstream
>   nbd: support NBD_SET_FLAGS ioctl
>   nbd: add support for NBD_CMD_FLUSH
>   nbd: add support for NBD_CMD_FLAG_FUA
>   nbd: support NBD_CMD_TRIM in the server
>   sheepdog: add coroutine_fn markers
>   add socket_set_block
>   sheepdog: move coroutine send/recv function to generic code
>   block: add bdrv_co_flush support
>   nbd: switch to asynchronous operation
>   nbd: split requests

Okay, completed the review for this series now. I think if you consider
the comments posted so far for v2 we should be good.

Kevin

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server
  2011-09-08 15:24 ` [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server Paolo Bonzini
  2011-09-13 13:58   ` Kevin Wolf
@ 2011-09-14 15:44   ` Christoph Hellwig
  2011-09-14 16:25     ` Paolo Bonzini
  1 sibling, 1 reply; 42+ messages in thread
From: Christoph Hellwig @ 2011-09-14 15:44 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

On Thu, Sep 08, 2011 at 05:24:59PM +0200, Paolo Bonzini wrote:
> Map it to bdrv_discard.  The server can now expose NBD_FLAG_SEND_TRIM.

Note that discard support without a way to communicate the alignment/size
requirements, and without the discard_zeroes_data flag is pretty much
useless.  Can you work with the upstream developers to make sure it's
actually useful?

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server
  2011-09-14 15:44   ` Christoph Hellwig
@ 2011-09-14 16:25     ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2011-09-14 16:25 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: qemu-devel

On 09/14/2011 05:44 PM, Christoph Hellwig wrote:
>> Map it to bdrv_discard.  The server can now expose NBD_FLAG_SEND_TRIM.
>
> Note that discard support without a way to communicate the alignment/size
> requirements,

Yep, especially because alignment can be as small as 512 for sparse raw, 
and as high as 65536 for qcow2...

> and without the discard_zeroes_data flag is pretty much
> useless.

... but right now in QEMU it is most useful with qcow2, so 
!discard_zeroes_data is pretty much the best you can do.

In general, QEMU's block layer does not really do much to pass 
information on discard features, which is why I didn't think much about 
these bits.  I'm interested in making NBD as featureful as the QEMU 
block layer, but beyond that not much. :)

Still you're obviously right, I'll talk to upstream.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2011-09-14 16:26 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-08 15:24 [Qemu-devel] [PATCH 00/12] nbd improvements Paolo Bonzini
2011-09-08 15:24 ` [Qemu-devel] [PATCH 01/12] nbd: support feature negotiation Paolo Bonzini
2011-09-08 15:24 ` [Qemu-devel] [PATCH 02/12] nbd: sync API definitions with upstream Paolo Bonzini
2011-09-12 14:15   ` Kevin Wolf
2011-09-12 15:08     ` Paolo Bonzini
2011-09-08 15:24 ` [Qemu-devel] [PATCH 03/12] nbd: support NBD_SET_FLAGS ioctl Paolo Bonzini
2011-09-08 15:24 ` [Qemu-devel] [PATCH 04/12] nbd: add support for NBD_CMD_FLUSH Paolo Bonzini
2011-09-13 13:52   ` Kevin Wolf
2011-09-13 15:13     ` Paolo Bonzini
2011-09-08 15:24 ` [Qemu-devel] [PATCH 05/12] nbd: add support for NBD_CMD_FLAG_FUA Paolo Bonzini
2011-09-13 13:55   ` Kevin Wolf
2011-09-08 15:24 ` [Qemu-devel] [PATCH 06/12] nbd: support NBD_CMD_TRIM in the server Paolo Bonzini
2011-09-13 13:58   ` Kevin Wolf
2011-09-13 15:14     ` Paolo Bonzini
2011-09-14 15:44   ` Christoph Hellwig
2011-09-14 16:25     ` Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 07/12] sheepdog: add coroutine_fn markers Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 08/12] add socket_set_block Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 09/12] sheepdog: move coroutine send/recv function to generic code Paolo Bonzini
2011-09-09  4:53   ` MORITA Kazutaka
2011-09-09  8:11     ` [Qemu-devel] [PATCH v2 " Paolo Bonzini
2011-09-13  0:28       ` MORITA Kazutaka
2011-09-13 14:14       ` Kevin Wolf
2011-09-13 15:16         ` Paolo Bonzini
2011-09-13 15:36           ` Kevin Wolf
2011-09-13 15:38             ` Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 10/12] block: add bdrv_co_flush support Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 11/12] nbd: switch to asynchronous operation Paolo Bonzini
2011-09-09 14:52   ` Nicholas Thomas
2011-09-09 15:03     ` Paolo Bonzini
2011-09-09 15:34       ` Paolo Bonzini
2011-09-08 15:25 ` [Qemu-devel] [PATCH 12/12] nbd: split requests Paolo Bonzini
2011-09-09 14:52   ` Nicholas Thomas
2011-09-09 15:33     ` Paolo Bonzini
2011-09-09  9:00 ` [Qemu-devel] [PATCH 00/12] nbd improvements Kevin Wolf
2011-09-09 10:29   ` Paolo Bonzini
2011-09-09 10:42     ` Kevin Wolf
2011-09-09 10:50     ` Nicholas Thomas
2011-09-09 11:00       ` Paolo Bonzini
2011-09-09 11:04       ` Kevin Wolf
2011-09-09 14:51         ` Nicholas Thomas
2011-09-14  9:50 ` Kevin Wolf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.