All of lore.kernel.org
 help / color / mirror / Atom feed
* nbd block device backend - 'improvements'
@ 2011-02-14 17:28 Nicholas Thomas
  2011-02-15  8:40 ` Avi Kivity
  0 siblings, 1 reply; 2+ messages in thread
From: Nicholas Thomas @ 2011-02-14 17:28 UTC (permalink / raw)
  To: kvm

[-- Attachment #1: Type: text/plain, Size: 1375 bytes --]

Hi,

I've been doing some work with /block/nbd.c with the aim of improving
its behaviour when the NBD server is inaccessible or goes away.

Current behaviour is to exit on startup if connecting to the NBD server
fails for any reason. If the connection dies  once KVM is started, the
VM stays up but reads and writes fail. No attempt to re-establish the
connection is made. 

I've written a patch that changes the behaviour - instead of exiting at
startup, we wait for the NBD connection to be established, and we hang
on reads and writes until the connection is re-established.

I'm interested in getting the changes merged upstream, so I thought I'd
get in early and ask if you'd be interested in the patch, in principle;
whether the old behaviour would need to be preserved, making the new
behaviour accessible via a config option ("-drive
file=nbd:127.0.0.1:5000:retry=forever,..." ?); and whether I'm going
about the changes in a sane way (I've attached the current version of
the patch).

Another thing I've noticed is that the nbd library ( /nbd.c ) is not
IPv6-compatible ( "-drive file=nbd:\:\:1:5000", for instance ) - I don't
have a patch for that yet, but I'm going to need to write one :) -
presumably you'd like that merging upstream too (and I should make the
library use the functions provided in qemu_socket.h) ?

Regards,
-- 
Nick Thomas
Bytemark Computing


[-- Attachment #2: 01-make-nbd-retry.patch --]
[-- Type: text/x-patch, Size: 4761 bytes --]

diff --git a/block/nbd.c b/block/nbd.c
index c8dc763..87da07e 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -39,9 +39,11 @@ typedef struct BDRVNBDState {
     int sock;
     off_t size;
     size_t blocksize;
+    char *filename;
 } BDRVNBDState;
 
-static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
+
+static int nbd_open(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
     uint32_t nbdflags;
@@ -56,7 +58,7 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
     int ret;
     int err = -EINVAL;
 
-    file = qemu_strdup(filename);
+    file = qemu_strdup(s->filename);
 
     name = strstr(file, EN_OPTSTR);
     if (name) {
@@ -121,32 +123,62 @@ out:
     return err;
 }
 
+// Puts the filename into the driver state and calls nbd_open - hanging until
+// it is successful.
+static int nbd_setup(BlockDriverState *bs, const char* filename, int flags)
+{
+    BDRVNBDState *s = bs->opaque;
+    int err = 1;
+
+    s->filename = qemu_strdup(filename);
+    while (err != 0)
+    {
+        err = nbd_open(bs);
+        // Avoid tight loops
+        if (err != 0)
+            sleep(1);
+    }
+
+    return err;
+}
+
 static int nbd_read(BlockDriverState *bs, int64_t sector_num,
                     uint8_t *buf, int nb_sectors)
 {
     BDRVNBDState *s = bs->opaque;
     struct nbd_request request;
     struct nbd_reply reply;
+    bool success = false;
 
     request.type = NBD_CMD_READ;
     request.handle = (uint64_t)(intptr_t)bs;
-    request.from = sector_num * 512;;
+    request.from = sector_num * 512;
     request.len = nb_sectors * 512;
 
-    if (nbd_send_request(s->sock, &request) == -1)
-        return -errno;
+    while (success == false)
+    {
+        if ( (nbd_send_request(s->sock, &request) == -1) ||
+             (nbd_receive_reply(s->sock, &reply) == -1)     )
+        {
+            // We hang here until the TCP session is established
+            close(s->sock);
+            while(nbd_open(bs) != 0)
+                sleep(1);
+            continue;
+        }
 
-    if (nbd_receive_reply(s->sock, &reply) == -1)
-        return -errno;
+        if (reply.error !=0)
+            return -reply.error;
 
-    if (reply.error !=0)
-        return -reply.error;
+        if (reply.handle != request.handle)
+            return -EIO;
 
-    if (reply.handle != request.handle)
-        return -EIO;
+        // If reading the actual data fails, we retry the whole request
+        if (nbd_wr_sync(s->sock, buf, request.len, 1) != request.len)
+            continue;
 
-    if (nbd_wr_sync(s->sock, buf, request.len, 1) != request.len)
-        return -EIO;
+        success = true;
+    }
 
     return 0;
 }
@@ -157,26 +189,39 @@ static int nbd_write(BlockDriverState *bs, int64_t sector_num,
     BDRVNBDState *s = bs->opaque;
     struct nbd_request request;
     struct nbd_reply reply;
+    bool success = false;
 
     request.type = NBD_CMD_WRITE;
     request.handle = (uint64_t)(intptr_t)bs;
     request.from = sector_num * 512;;
     request.len = nb_sectors * 512;
 
-    if (nbd_send_request(s->sock, &request) == -1)
-        return -errno;
+    while (success == false)
+    {
+        if ( (nbd_send_request(s->sock, &request) == -1) ||
+             (nbd_wr_sync(s->sock, (uint8_t*)buf, request.len, 0) != request.len) )
+        {
+            // We hang here until the TCP session is established
+            close(s->sock);
+            while(nbd_open(bs) != 0)
+                sleep(1);
+            continue;
+        }
+
+        // We didn't get a reply from the write, so try again
+        if (nbd_receive_reply(s->sock, &reply) == -1)
+            continue;
 
-    if (nbd_wr_sync(s->sock, (uint8_t*)buf, request.len, 0) != request.len)
-        return -EIO;
+        // Problem with the response itself
+        if (reply.error !=0)
+            return -reply.error;
 
-    if (nbd_receive_reply(s->sock, &reply) == -1)
-        return -errno;
+        if (reply.handle != request.handle)
+            return -EIO;
 
-    if (reply.error !=0)
-        return -reply.error;
+        success = true;
+    }
 
-    if (reply.handle != request.handle)
-        return -EIO;
 
     return 0;
 }
@@ -191,6 +236,7 @@ static void nbd_close(BlockDriverState *bs)
     request.from = 0;
     request.len = 0;
     nbd_send_request(s->sock, &request);
+    qemu_free(s->filename);
 
     close(s->sock);
 }
@@ -205,7 +251,7 @@ static int64_t nbd_getlength(BlockDriverState *bs)
 static BlockDriver bdrv_nbd = {
     .format_name	= "nbd",
     .instance_size	= sizeof(BDRVNBDState),
-    .bdrv_file_open	= nbd_open,
+    .bdrv_file_open	= nbd_setup,
     .bdrv_read		= nbd_read,
     .bdrv_write		= nbd_write,
     .bdrv_close		= nbd_close,

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: nbd block device backend - 'improvements'
  2011-02-14 17:28 nbd block device backend - 'improvements' Nicholas Thomas
@ 2011-02-15  8:40 ` Avi Kivity
  0 siblings, 0 replies; 2+ messages in thread
From: Avi Kivity @ 2011-02-15  8:40 UTC (permalink / raw)
  To: Nicholas Thomas; +Cc: kvm

On 02/14/2011 07:28 PM, Nicholas Thomas wrote:
> I'm interested in getting the changes merged upstream, so I thought I'd
> get in early and ask if you'd be interested in the patch, in principle;
> whether the old behaviour would need to be preserved, making the new
> behaviour accessible via a config option ("-drive
> file=nbd:127.0.0.1:5000:retry=forever,..." ?); and whether I'm going
> about the changes in a sane way (I've attached the current version of
> the patch).
>

Upstream is qemu-devel@nongnu.org, where qemu is developed.  In 
particular Kevin Wolf <kwolf@redhat.com>, the block layer maintainer.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2011-02-15  8:41 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-02-14 17:28 nbd block device backend - 'improvements' Nicholas Thomas
2011-02-15  8:40 ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.