All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: qemu-devel@nongnu.org
Cc: Anthony Liguori <aliguori@us.ibm.com>,
	aik@ozlabs.ru, Alexander Graf <agraf@suse.de>,
	qemu-ppc@nongnu.org, Paolo Bonzini <pbonzini@redhat.com>,
	Paul Mackerras <paulus@samba.org>,
	David Gibson <david@gibson.dropbear.id.au>
Subject: [Qemu-devel] [PATCH 15/17] pseries: savevm support with KVM
Date: Thu, 27 Jun 2013 16:45:58 +1000	[thread overview]
Message-ID: <1372315560-5478-16-git-send-email-aik@ozlabs.ru> (raw)
In-Reply-To: <1372315560-5478-1-git-send-email-aik@ozlabs.ru>

From: David Gibson <david@gibson.dropbear.id.au>

At present, the savevm / migration support for the pseries machine will not
work when KVM is enabled.  That's because KVM manages the guest's hash page
table in the host kernel, so qemu has no visibility of it.  This patch
fixes this by using new kernel interfaces to extract and reinsert the
guest's hash table during the migration process.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 hw/ppc/spapr.c         |  106 ++++++++++++++++++++++++++++++++++++++----------
 include/hw/ppc/spapr.h |    1 +
 target-ppc/kvm.c       |   69 +++++++++++++++++++++++++++++++
 target-ppc/kvm_ppc.h   |   22 ++++++++++
 4 files changed, 176 insertions(+), 22 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 211f434..9489edc 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -744,17 +744,27 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
 {
     sPAPREnvironment *spapr = opaque;
 
-    spapr->htab_save_index = 0;
-    spapr->htab_first_pass = true;
-
     /* "Iteration" header */
     qemu_put_be32(f, spapr->htab_shift);
 
+    if (spapr->htab) {
+        spapr->htab_save_index = 0;
+        spapr->htab_first_pass = true;
+    } else {
+        assert(kvm_enabled());
+
+        spapr->htab_fd = kvmppc_get_htab_fd(false);
+        if (spapr->htab_fd < 0) {
+            fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
+                    strerror(errno));
+            return -1;
+        }
+    }
+
+
     return 0;
 }
 
-#define MAX_ITERATION_NS    5000000 /* 5 ms */
-
 static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
                                  int64_t max_ns)
 {
@@ -805,8 +815,8 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
     spapr->htab_save_index = index;
 }
 
-static bool htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
-                                 int64_t max_ns)
+static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
+                                int64_t max_ns)
 {
     bool final = max_ns < 0;
     int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
@@ -879,21 +889,32 @@ static bool htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
 
     spapr->htab_save_index = index;
 
-    return (examined >= htabslots) && (sent == 0);
+    return (examined >= htabslots) && (sent == 0) ? 1 : 0;
 }
 
+#define MAX_ITERATION_NS    5000000 /* 5 ms */
+#define MAX_KVM_BUF_SIZE    2048
+
 static int htab_save_iterate(QEMUFile *f, void *opaque)
 {
     sPAPREnvironment *spapr = opaque;
-    bool nothingleft = false;;
+    int rc = 0;
 
     /* Iteration header */
     qemu_put_be32(f, 0);
 
-    if (spapr->htab_first_pass) {
+    if (!spapr->htab) {
+        assert(kvm_enabled());
+
+        rc = kvmppc_save_htab(f, spapr->htab_fd,
+                              MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
+        if (rc < 0) {
+            return rc;
+        }
+    } else  if (spapr->htab_first_pass) {
         htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
     } else {
-        nothingleft = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
+        rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
     }
 
     /* End marker */
@@ -901,7 +922,7 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
     qemu_put_be16(f, 0);
     qemu_put_be16(f, 0);
 
-    return nothingleft ? 1 : 0;
+    return rc;
 }
 
 static int htab_save_complete(QEMUFile *f, void *opaque)
@@ -911,7 +932,20 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
     /* Iteration header */
     qemu_put_be32(f, 0);
 
-    htab_save_later_pass(f, spapr, -1);
+    if (!spapr->htab) {
+        int rc;
+
+        assert(kvm_enabled());
+
+        rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
+        if (rc < 0) {
+            return rc;
+        }
+        close(spapr->htab_fd);
+        spapr->htab_fd = -1;
+    } else {
+        htab_save_later_pass(f, spapr, -1);
+    }
 
     /* End marker */
     qemu_put_be32(f, 0);
@@ -925,6 +959,7 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
 {
     sPAPREnvironment *spapr = opaque;
     uint32_t section_hdr;
+    int fd = -1;
 
     if (version_id < 1 || version_id > 1) {
         fprintf(stderr, "htab_load() bad version\n");
@@ -941,6 +976,16 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
         return 0;
     }
 
+    if (!spapr->htab) {
+        assert(kvm_enabled());
+
+        fd = kvmppc_get_htab_fd(true);
+        if (fd < 0) {
+            fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n",
+                    strerror(errno));
+        }
+    }
+
     while (true) {
         uint32_t index;
         uint16_t n_valid, n_invalid;
@@ -954,24 +999,41 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
             break;
         }
 
-        if ((index + n_valid + n_invalid) >=
+        if ((index + n_valid + n_invalid) >
             (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
             /* Bad index in stream */
             fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
-                    "in htab stream\n", index, n_valid, n_invalid);
+                    "in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid,
+                    spapr->htab_shift);
             return -EINVAL;
         }
 
-        if (n_valid) {
-            qemu_get_buffer(f, HPTE(spapr->htab, index),
-                            HASH_PTE_SIZE_64 * n_valid);
-        }
-        if (n_invalid) {
-            memset(HPTE(spapr->htab, index + n_valid), 0,
-                   HASH_PTE_SIZE_64 * n_invalid);
+        if (spapr->htab) {
+            if (n_valid) {
+                qemu_get_buffer(f, HPTE(spapr->htab, index),
+                                HASH_PTE_SIZE_64 * n_valid);
+            }
+            if (n_invalid) {
+                memset(HPTE(spapr->htab, index + n_valid), 0,
+                       HASH_PTE_SIZE_64 * n_invalid);
+            }
+        } else {
+            int rc;
+
+            assert(fd >= 0);
+
+            rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
+            if (rc < 0) {
+                return rc;
+            }
         }
     }
 
+    if (!spapr->htab) {
+        assert(fd >= 0);
+        close(fd);
+    }
+
     return 0;
 }
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 4cfe449..3da31f0 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -37,6 +37,7 @@ typedef struct sPAPREnvironment {
     /* Migration state */
     int htab_save_index;
     bool htab_first_pass;
+    int htab_fd;
 } sPAPREnvironment;
 
 #define H_SUCCESS         0
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 33ddf63..ff85c19 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -65,6 +65,7 @@ static int cap_one_reg;
 static int cap_epr;
 static int cap_ppc_watchdog;
 static int cap_papr;
+static int cap_htab_fd;
 
 /* XXX We have a race condition where we actually have a level triggered
  *     interrupt, but the infrastructure can't expose that yet, so the guest
@@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s)
     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
     /* Note: we don't set cap_papr here, because this capability is
      * only activated after this by kvmppc_set_papr() */
+    cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 
     if (!cap_interrupt_level) {
         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
@@ -1802,6 +1804,73 @@ int kvmppc_define_rtas_token(uint32_t token, const char *function)
     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
 }
 
+int kvmppc_get_htab_fd(bool write)
+{
+    struct kvm_get_htab_fd s = {
+        .flags = write ? KVM_GET_HTAB_WRITE : 0,
+        .start_index = 0,
+    };
+
+    if (!cap_htab_fd) {
+        fprintf(stderr, "KVM version doesn't support saving the hash table\n");
+        return -1;
+    }
+
+    return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
+}
+
+int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
+{
+    int64_t starttime = qemu_get_clock_ns(rt_clock);
+    uint8_t buf[bufsize];
+    ssize_t rc;
+
+    do {
+        rc = read(fd, buf, bufsize);
+        if (rc < 0) {
+            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
+                    strerror(errno));
+            return rc;
+        } else if (rc) {
+            /* Kernel already retuns data in BE format for the file */
+            qemu_put_buffer(f, buf, rc);
+        }
+    } while ((rc != 0)
+             && ((max_ns < 0)
+                 || ((qemu_get_clock_ns(rt_clock) - starttime) < max_ns)));
+
+    return (rc == 0) ? 1 : 0;
+}
+
+int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
+                           uint16_t n_valid, uint16_t n_invalid)
+{
+    struct kvm_get_htab_header *buf;
+    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
+    ssize_t rc;
+
+    buf = alloca(chunksize);
+    /* This is KVM on ppc, so this is all big-endian */
+    buf->index = index;
+    buf->n_valid = n_valid;
+    buf->n_invalid = n_invalid;
+
+    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
+
+    rc = write(fd, buf, chunksize);
+    if (rc < 0) {
+        fprintf(stderr, "Error writing KVM hash table: %s\n",
+                strerror(errno));
+        return rc;
+    }
+    if (rc != chunksize) {
+        /* We should never get a short write on a single chunk */
+        fprintf(stderr, "Short write, restoring KVM hash table\n");
+        return -1;
+    }
+    return 0;
+}
+
 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
 {
     return true;
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 21939a8..12564ef 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -39,6 +39,10 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
 int kvmppc_fixup_cpu(PowerPCCPU *cpu);
 bool kvmppc_has_cap_epr(void);
 int kvmppc_define_rtas_token(uint32_t token, const char *function);
+int kvmppc_get_htab_fd(bool write);
+int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
+int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
+                           uint16_t n_valid, uint16_t n_invalid);
 
 #else
 
@@ -166,6 +170,24 @@ static inline int kvmppc_define_rtas_token(uint32_t token,
 {
     return -1;
 }
+
+static inline int kvmppc_get_htab_fd(bool write)
+{
+    return -1;
+}
+
+static inline int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize,
+                                   int64_t max_ns)
+{
+    abort();
+}
+
+static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
+                                         uint16_t n_valid, uint16_t n_invalid)
+{
+    abort();
+}
+
 #endif
 
 #ifndef CONFIG_KVM
-- 
1.7.10.4

  parent reply	other threads:[~2013-06-27  6:47 UTC|newest]

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-27  6:45 [Qemu-devel] [PATCH 00/17 v3] spapr: migration, pci, msi, power8 Alexey Kardashevskiy
2013-06-27  6:45 ` [Qemu-devel] [PATCH 01/17] pseries: move interrupt controllers to hw/intc/ Alexey Kardashevskiy
2013-07-02 20:54   ` Andreas Färber
2013-07-08 18:15   ` Anthony Liguori
2013-07-08 18:34     ` Alexander Graf
2013-06-27  6:45 ` [Qemu-devel] [PATCH 02/17] pseries: rework XICS Alexey Kardashevskiy
2013-06-27 11:47   ` David Gibson
2013-06-27 12:17     ` Alexey Kardashevskiy
2013-07-02  0:06       ` David Gibson
2013-07-02  0:21         ` Alexander Graf
2013-07-02  2:08           ` Alexey Kardashevskiy
2013-07-08 18:24       ` Anthony Liguori
2013-07-08 18:22   ` Anthony Liguori
2013-07-09  3:40     ` Alexey Kardashevskiy
2013-07-09  4:48       ` Benjamin Herrenschmidt
2013-07-09 13:58         ` Anthony Liguori
2013-07-10  3:06           ` Alexey Kardashevskiy
2013-07-10  3:26           ` Benjamin Herrenschmidt
2013-07-10 12:09             ` Anthony Liguori
2013-06-27  6:45 ` [Qemu-devel] [PATCH 03/17] savevm: Implement VMS_DIVIDE flag Alexey Kardashevskiy
2013-07-08 18:27   ` Anthony Liguori
2013-07-08 23:57     ` David Gibson
2013-07-09 14:06       ` Anthony Liguori
2013-07-09 14:38         ` David Gibson
2013-06-27  6:45 ` [Qemu-devel] [PATCH 04/17] target-ppc: Convert ppc cpu savevm to VMStateDescription Alexey Kardashevskiy
2013-07-08 18:29   ` Anthony Liguori
2013-07-09  5:14     ` Alexey Kardashevskiy
2013-07-09 14:08       ` Anthony Liguori
2013-07-09 15:11         ` David Gibson
2013-07-10  3:31           ` Benjamin Herrenschmidt
2013-07-10  7:49             ` David Gibson
2013-07-15 13:24           ` Paolo Bonzini
2013-06-27  6:45 ` [Qemu-devel] [PATCH 05/17] pseries: savevm support for XICS interrupt controller Alexey Kardashevskiy
2013-07-08 18:31   ` Anthony Liguori
2013-07-09  0:06     ` Alexey Kardashevskiy
2013-07-09  0:49       ` Anthony Liguori
2013-07-09  0:59         ` Alexey Kardashevskiy
2013-07-09  1:25           ` Anthony Liguori
2013-07-09  3:37         ` Alexey Kardashevskiy
2013-07-15 13:05           ` Paolo Bonzini
2013-07-15 13:13             ` Alexey Kardashevskiy
2013-07-15 13:17               ` Paolo Bonzini
2013-07-09  7:17     ` David Gibson
2013-07-15 13:10       ` Paolo Bonzini
2013-06-27  6:45 ` [Qemu-devel] [PATCH 06/17] pseries: savevm support for VIO devices Alexey Kardashevskiy
2013-07-08 18:35   ` Anthony Liguori
2013-06-27  6:45 ` [Qemu-devel] [PATCH 07/17] pseries: savevm support for PAPR VIO logical lan Alexey Kardashevskiy
2013-07-08 18:36   ` Anthony Liguori
2013-06-27  6:45 ` [Qemu-devel] [PATCH 08/17] pseries: savevm support for PAPR TCE tables Alexey Kardashevskiy
2013-07-08 18:39   ` Anthony Liguori
2013-07-08 21:45     ` Benjamin Herrenschmidt
2013-07-08 22:15       ` Anthony Liguori
2013-07-08 22:41         ` Benjamin Herrenschmidt
2013-07-09  7:20     ` David Gibson
2013-07-09 15:22       ` Anthony Liguori
2013-07-10  7:42         ` David Gibson
2013-07-09 16:26       ` Anthony Liguori
2013-07-15 13:26     ` Paolo Bonzini
2013-07-15 15:06       ` Anthony Liguori
2013-06-27  6:45 ` [Qemu-devel] [PATCH 09/17] pseries: rework PAPR virtual SCSI Alexey Kardashevskiy
2013-07-08 18:42   ` Anthony Liguori
2013-07-15 13:11     ` Paolo Bonzini
2013-06-27  6:45 ` [Qemu-devel] [PATCH 10/17] pseries: savevm support for " Alexey Kardashevskiy
2013-06-27  6:45 ` [Qemu-devel] [PATCH 11/17] pseries: savevm support for pseries machine Alexey Kardashevskiy
2013-07-08 18:45   ` Anthony Liguori
2013-07-08 18:50     ` Alexander Graf
2013-07-08 19:01       ` Anthony Liguori
2013-07-08 21:48     ` Benjamin Herrenschmidt
2013-07-08 22:23       ` Anthony Liguori
2013-06-27  6:45 ` [Qemu-devel] [PATCH 12/17] pseries: savevm support for PCI host bridge Alexey Kardashevskiy
2013-07-08 18:45   ` Anthony Liguori
2013-06-27  6:45 ` [Qemu-devel] [PATCH 13/17] target-ppc: Add helper for KVM_PPC_RTAS_DEFINE_TOKEN Alexey Kardashevskiy
2013-06-27  6:45 ` [Qemu-devel] [PATCH 14/17] pseries: Support for in-kernel XICS interrupt controller Alexey Kardashevskiy
2013-07-08 18:50   ` Anthony Liguori
2013-07-09  3:21     ` Alexey Kardashevskiy
2013-07-09  7:21       ` David Gibson
2013-07-10  3:24         ` Benjamin Herrenschmidt
2013-07-10  7:48           ` David Gibson
2013-06-27  6:45 ` Alexey Kardashevskiy [this message]
2013-06-27  6:45 ` [Qemu-devel] [PATCH 16/17] ppc64: Enable QEMU to run on POWER 8 DD1 chip Alexey Kardashevskiy
2013-07-04  5:54   ` Andreas Färber
2013-07-04  6:26     ` [Qemu-devel] [Qemu-ppc] " Benjamin Herrenschmidt
2013-07-04  6:42     ` [Qemu-devel] " Prerna Saxena
2013-07-10 11:19       ` Alexander Graf
2013-06-27  6:46 ` [Qemu-devel] [PATCH 17/17] spapr-pci: rework MSI/MSIX Alexey Kardashevskiy
2013-07-04  2:31 ` [Qemu-devel] [PATCH 00/17 v3] spapr: migration, pci, msi, power8 Alexey Kardashevskiy
2013-07-04  2:40   ` Anthony Liguori
2013-07-04  2:48     ` Alexey Kardashevskiy
2013-07-08 18:01 ` Anthony Liguori
2013-07-09  6:37   ` Alexey Kardashevskiy
2013-07-09 15:26     ` Anthony Liguori
2013-07-09 14:04 ` Anthony Liguori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1372315560-5478-16-git-send-email-aik@ozlabs.ru \
    --to=aik@ozlabs.ru \
    --cc=agraf@suse.de \
    --cc=aliguori@us.ibm.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=paulus@samba.org \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.