[PATCH] libxc: succeed silently on restore

* [PATCH] libxc: succeed silently on restore
@ 2010-09-02 16:14 Ian Campbell
  2010-09-02 17:01 ` Ian Campbell
  2010-09-02 17:14 ` Ian Jackson
  0 siblings, 2 replies; 9+ messages in thread
From: Ian Campbell @ 2010-09-02 16:14 UTC (permalink / raw)
  To: xen-devel; +Cc: Brendan Cully

# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1283444053 -3600
# Node ID 5ad37819cddd19a270659d50ddf48da27ef987f6
# Parent  5906bc603a3ce47ca01b33bef7d952af822459fd
libxc: succeed silently on restore.

When doing a non-checkpoint restore we should expect an EOF at the end
of the restore and not log errors:
  xc: error: 0-length read: Internal error
  xc: error: read_exact_timed failed (read rc: 0, errno: 0): Internal error
  xc: error: Error when reading batch size (0 = Success): Internal error
  xc: error: error when buffering batch, finishing (0 = Success): Internal error
(I especially like implied the "Error == Success" ;-))

We allow for an EOF precisely at the start of a new batch only when
the restore is already marked as completed. In this case rdexact will
fail silently and propagate the specific failure upwards so in order
for higher levels to remain silent as well.

This is hackier than I would like but short of a massive refactoring
is the best I could come up with

I think this does something sensible for Remus too, although I've not
actually tried it. It's not clear that the select timeout case
shouldn't be logged in a less scary way too, since it isn't really an
error in the restore (although presumably it represents some sort of
failure on the active VM on the other end of the pipe).

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Brendan Cully <brendan@cs.ubc.ca>

diff -r 5906bc603a3c -r 5ad37819cddd tools/libxc/xc_domain_restore.c

--- a/tools/libxc/xc_domain_restore.c	Thu Sep 02 14:38:44 2010 +0100
+++ b/tools/libxc/xc_domain_restore.c	Thu Sep 02 17:14:13 2010 +0100
@@ -49,7 +49,7 @@ struct restore_ctx {
 
 #ifndef __MINIOS__
 static ssize_t rdexact(struct xc_interface *xch, struct restore_ctx *ctx,
-                       int fd, void* buf, size_t size)
+                       int fd, void* buf, size_t size, int eof)
 {
     size_t offset = 0;
     ssize_t len;
@@ -69,7 +69,7 @@ static ssize_t rdexact(struct xc_interfa
             if ( len == -1 && errno == EINTR )
                 continue;
             if ( !FD_ISSET(fd, &rfds) ) {
-                ERROR("read_exact_timed failed (select returned %zd)", len);
+                ERROR("rdexact failed (select returned %zd)", len);
                 errno = ETIMEDOUT;
                 return -1;
             }
@@ -79,11 +79,14 @@ static ssize_t rdexact(struct xc_interfa
         if ( (len == -1) && ((errno == EINTR) || (errno == EAGAIN)) )
             continue;
         if ( len == 0 ) {
+            /* If we are expecting EOF then fail silently with errno = 0 */
+            if ( offset == 0 && eof )
+                return -2;
             ERROR("0-length read");
             errno = 0;
         }
         if ( len <= 0 ) {
-            ERROR("read_exact_timed failed (read rc: %d, errno: %d)", len, errno);
+            ERROR("rdexact failed (read rc: %d, errno: %d)", len, errno);
             return -1;
         }
         offset += len;
@@ -92,9 +95,11 @@ static ssize_t rdexact(struct xc_interfa
     return 0;
 }
 
-#define RDEXACT(fd,buf,size) rdexact(xch, ctx, fd, buf, size)
+#define RDEXACT(fd,buf,size) rdexact(xch, ctx, fd, buf, size, 0)
+#define RDEXACT_EOF(fd,buf,size) rdexact(xch, ctx, fd, buf, size, ctx->completed)
 #else
 #define RDEXACT read_exact
+#define RDEXACT_EOF read_exact
 #endif
 /*
 ** In the state file (or during transfer), all page-table pages are
@@ -671,11 +676,13 @@ static int pagebuf_get_one(xc_interface 
 {
     int count, countpages, oldcount, i;
     void* ptmp;
+    int rc;
 
-    if ( RDEXACT(fd, &count, sizeof(count)) )
+    if ( (rc = RDEXACT_EOF(fd, &count, sizeof(count))) )
     {
-        PERROR("Error when reading batch size");
-        return -1;
+        if ( rc == -1 )
+            PERROR("Error when reading batch size");
+        return rc;
     }
 
     // DPRINTF("reading batch of %d pages\n", count);
@@ -1289,8 +1296,9 @@ int xc_domain_restore(xc_interface *xch,
 
     // DPRINTF("Buffered checkpoint\n");
 
-    if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
-        PERROR("error when buffering batch, finishing");
+    if ( (frc = pagebuf_get(xch, ctx, &pagebuf, io_fd, dom)) ) {
+        if ( frc == -1 )
+            PERROR("error when buffering batch, finishing");
         goto finish;
     }
     memset(&tmptail, 0, sizeof(tmptail));

^ permalink raw reply	[flat|nested] 9+ messages in thread