All of lore.kernel.org
 help / color / mirror / Atom feed
From: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
To: Kevin Wolf <kwolf@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	qemu-devel@nongnu.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	sheepdog@lists.wpkg.org, Liu Yuan <namei.unix@gmail.com>
Subject: [Qemu-devel] [PATCH v3 07/10] sheepdog: try to reconnect to sheepdog after network error
Date: Thu, 25 Jul 2013 17:32:02 +0900	[thread overview]
Message-ID: <1374741125-31859-8-git-send-email-morita.kazutaka@lab.ntt.co.jp> (raw)
In-Reply-To: <1374741125-31859-1-git-send-email-morita.kazutaka@lab.ntt.co.jp>

This introduces a failed request queue and links all the inflight
requests to the list after network error happens.  After QEMU
reconnects to the sheepdog server successfully, the sheepdog block
driver will retry all the requests in the failed queue.

Signed-off-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
---
 block/sheepdog.c | 72 ++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 57 insertions(+), 15 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 7b22816..43a6feb 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -318,8 +318,11 @@ typedef struct BDRVSheepdogState {
     Coroutine *co_recv;
 
     uint32_t aioreq_seq_num;
+
+    /* Every aio request must be linked to either of these queues. */
     QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
     QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head;
+    QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
 } BDRVSheepdogState;
 
 static const char * sd_strerror(int err)
@@ -613,6 +616,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
                            enum AIOCBState aiocb_type);
 static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
 static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
+static int get_sheep_fd(BDRVSheepdogState *s);
+static void co_write_request(void *opaque);
 
 static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid)
 {
@@ -654,6 +659,44 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
     }
 }
 
+static coroutine_fn void reconnect_to_sdog(void *opaque)
+{
+    BDRVSheepdogState *s = opaque;
+    AIOReq *aio_req, *next;
+
+    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
+    close(s->fd);
+    s->fd = -1;
+
+    /* Wait for outstanding write requests to be completed. */
+    while (s->co_send != NULL) {
+        co_write_request(opaque);
+    }
+
+    /* Try to reconnect the sheepdog server every one second. */
+    while (s->fd < 0) {
+        s->fd = get_sheep_fd(s);
+        if (s->fd < 0) {
+            dprintf("Wait for connection to be established\n");
+            co_aio_sleep_ns(1000000000ULL);
+        }
+    };
+
+    /* Move all the inflight requests to the failed queue. */
+    QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) {
+        QLIST_REMOVE(aio_req, aio_siblings);
+        QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings);
+    }
+
+    /* Resend all the failed aio requests. */
+    while (!QLIST_EMPTY(&s->failed_aio_head)) {
+        aio_req = QLIST_FIRST(&s->failed_aio_head);
+        QLIST_REMOVE(aio_req, aio_siblings);
+        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
+        resend_aioreq(s, aio_req);
+    }
+}
+
 /*
  * Receive responses of the I/O requests.
  *
@@ -670,15 +713,11 @@ static void coroutine_fn aio_read_response(void *opaque)
     SheepdogAIOCB *acb;
     uint64_t idx;
 
-    if (QLIST_EMPTY(&s->inflight_aio_head)) {
-        goto out;
-    }
-
     /* read a header */
     ret = qemu_co_recv(fd, &rsp, sizeof(rsp));
     if (ret != sizeof(rsp)) {
         error_report("failed to get the header, %s", strerror(errno));
-        goto out;
+        goto err;
     }
 
     /* find the right aio_req from the inflight aio list */
@@ -689,7 +728,7 @@ static void coroutine_fn aio_read_response(void *opaque)
     }
     if (!aio_req) {
         error_report("cannot find aio_req %x", rsp.id);
-        goto out;
+        goto err;
     }
 
     acb = aio_req->aiocb;
@@ -729,7 +768,7 @@ static void coroutine_fn aio_read_response(void *opaque)
                             aio_req->iov_offset, rsp.data_length);
         if (ret != rsp.data_length) {
             error_report("failed to get the data, %s", strerror(errno));
-            goto out;
+            goto err;
         }
         break;
     case AIOCB_FLUSH_CACHE:
@@ -763,10 +802,9 @@ static void coroutine_fn aio_read_response(void *opaque)
         if (s->inode.vdi_id == oid_to_vid(aio_req->oid)) {
             ret = reload_inode(s, 0, "");
             if (ret < 0) {
-                goto out;
+                goto err;
             }
         }
-
         if (is_data_obj(aio_req->oid)) {
             aio_req->oid = vid_to_data_oid(s->inode.vdi_id,
                                            data_oid_to_idx(aio_req->oid));
@@ -794,6 +832,10 @@ static void coroutine_fn aio_read_response(void *opaque)
     }
 out:
     s->co_recv = NULL;
+    return;
+err:
+    s->co_recv = NULL;
+    reconnect_to_sdog(opaque);
 }
 
 static void co_read_response(void *opaque)
@@ -819,7 +861,8 @@ static int aio_flush_request(void *opaque)
     BDRVSheepdogState *s = opaque;
 
     return !QLIST_EMPTY(&s->inflight_aio_head) ||
-        !QLIST_EMPTY(&s->pending_aio_head);
+        !QLIST_EMPTY(&s->pending_aio_head) ||
+        !QLIST_EMPTY(&s->failed_aio_head);
 }
 
 /*
@@ -1094,23 +1137,21 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     /* send a header */
     ret = qemu_co_send(s->fd, &hdr, sizeof(hdr));
     if (ret != sizeof(hdr)) {
-        qemu_co_mutex_unlock(&s->lock);
         error_report("failed to send a req, %s", strerror(errno));
-        return -errno;
+        goto out;
     }
 
     if (wlen) {
         ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen);
         if (ret != wlen) {
-            qemu_co_mutex_unlock(&s->lock);
             error_report("failed to send a data, %s", strerror(errno));
-            return -errno;
         }
     }
-
+out:
     socket_set_cork(s->fd, 0);
     qemu_aio_set_fd_handler(s->fd, co_read_response, NULL,
                             aio_flush_request, s);
+    s->co_send = NULL;
     qemu_co_mutex_unlock(&s->lock);
 
     return 0;
@@ -1300,6 +1341,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags)
 
     QLIST_INIT(&s->inflight_aio_head);
     QLIST_INIT(&s->pending_aio_head);
+    QLIST_INIT(&s->failed_aio_head);
     s->fd = -1;
 
     memset(vdi, 0, sizeof(vdi));
-- 
1.8.1.3.566.gaa39828

  parent reply	other threads:[~2013-07-25  8:33 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-07-25  8:31 [Qemu-devel] [PATCH v3 00/10] sheepdog: reconnect server after connection failure MORITA Kazutaka
2013-07-25  8:31 ` [Qemu-devel] [PATCH v3 01/10] ignore SIGPIPE in qemu-img and qemu-io MORITA Kazutaka
2013-07-25  8:31 ` [Qemu-devel] [PATCH v3 02/10] iov: handle EOF in iov_send_recv MORITA Kazutaka
2013-07-25  8:31 ` [Qemu-devel] [PATCH v3 03/10] sheepdog: check return values of qemu_co_recv/send correctly MORITA Kazutaka
2013-07-25  8:46   ` Liu Yuan
2013-07-25  8:53     ` [Qemu-devel] [sheepdog] " MORITA Kazutaka
2013-07-25  8:56     ` [Qemu-devel] " Liu Yuan
2013-07-25  8:31 ` [Qemu-devel] [PATCH v3 04/10] sheepdog: handle vdi objects in resend_aio_req MORITA Kazutaka
2013-07-25  8:32 ` [Qemu-devel] [PATCH v3 05/10] sheepdog: reload inode outside of resend_aioreq MORITA Kazutaka
2013-07-25  8:32 ` [Qemu-devel] [PATCH v3 06/10] coroutine: add co_aio_sleep_ns() to allow sleep in block drivers MORITA Kazutaka
2013-07-25  8:32 ` MORITA Kazutaka [this message]
2013-07-25  9:13   ` [Qemu-devel] [PATCH v3 07/10] sheepdog: try to reconnect to sheepdog after network error Liu Yuan
2013-07-25 12:53     ` [Qemu-devel] [sheepdog] " MORITA Kazutaka
2013-07-25 13:20       ` Liu Yuan
2013-07-26  5:53         ` MORITA Kazutaka
2013-07-25  8:32 ` [Qemu-devel] [PATCH v3 08/10] sheepdog: make add_aio_request and send_aioreq void functions MORITA Kazutaka
2013-07-25  8:32 ` [Qemu-devel] [PATCH v3 09/10] sheepdog: cancel aio requests if possible MORITA Kazutaka
2013-07-25  9:04   ` Liu Yuan
2013-07-25 12:54     ` [Qemu-devel] [sheepdog] " MORITA Kazutaka
2013-07-25  8:32 ` [Qemu-devel] [PATCH v3 10/10] sheepdog: check simultaneous create in resend_aioreq MORITA Kazutaka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1374741125-31859-8-git-send-email-morita.kazutaka@lab.ntt.co.jp \
    --to=morita.kazutaka@lab.ntt.co.jp \
    --cc=kwolf@redhat.com \
    --cc=namei.unix@gmail.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=sheepdog@lists.wpkg.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.