All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steve Sistare <steven.sistare@oracle.com>
To: qemu-devel@nongnu.org
Cc: "Paolo Bonzini" <pbonzini@redhat.com>,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Marc-André Lureau" <marcandre.lureau@redhat.com>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Dr. David Alan Gilbert" <dgilbert@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Marcel Apfelbaum" <marcel.apfelbaum@gmail.com>,
	"Alex Williamson" <alex.williamson@redhat.com>,
	"Daniel P. Berrange" <berrange@redhat.com>,
	"Juan Quintela" <quintela@redhat.com>,
	"Markus Armbruster" <armbru@redhat.com>,
	"Eric Blake" <eblake@redhat.com>,
	"Jason Zeng" <jason.zeng@linux.intel.com>,
	"Zheng Chuan" <zhengchuan@huawei.com>,
	"Steve Sistare" <steven.sistare@oracle.com>,
	"Mark Kanda" <mark.kanda@oracle.com>,
	"Guoyi Tu" <tugy@chinatelecom.cn>,
	"Peter Maydell" <peter.maydell@linaro.org>,
	"Philippe Mathieu-Daudé" <philippe.mathieu.daude@gmail.com>,
	"Igor Mammedov" <imammedo@redhat.com>,
	"David Hildenbrand" <david@redhat.com>,
	"John Snow" <jsnow@redhat.com>, "Peng Liang" <tcx4c70@gmail.com>
Subject: [PATCH V9 34/46] vfio-pci: recover from unmap-all-vaddr failure
Date: Tue, 26 Jul 2022 09:10:31 -0700	[thread overview]
Message-ID: <1658851843-236870-35-git-send-email-steven.sistare@oracle.com> (raw)
In-Reply-To: <1658851843-236870-1-git-send-email-steven.sistare@oracle.com>

If there are multiple containers and unmap-all fails for some container, we
need to remap vaddr for the other containers for which unmap-all succeeded.
Recover by walking all flat sections of all containers to restore the vaddr
for each.  Do so by invoking the vfio listener callback, and passing a new
"remap" flag that tells it to restore a mapping without re-allocating new
userland data structures.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 hw/vfio/common.c              | 79 ++++++++++++++++++++++++++++++++++---------
 hw/vfio/cpr.c                 | 36 ++++++++++++++++++++
 include/hw/vfio/vfio-common.h |  2 +-
 3 files changed, 100 insertions(+), 17 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b7b35d4..de821e1 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -895,15 +895,35 @@ static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
     return true;
 }
 
+static VFIORamDiscardListener *vfio_find_ram_discard_listener(
+    VFIOContainer *container, MemoryRegionSection *section)
+{
+    VFIORamDiscardListener *vrdl;
+
+    QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+        if (vrdl->mr == section->mr &&
+            vrdl->offset_within_address_space ==
+            section->offset_within_address_space) {
+            break;
+        }
+    }
+
+    if (!vrdl) {
+        hw_error("vfio: Trying to sync missing RAM discard listener");
+        /* does not return */
+    }
+    return vrdl;
+}
+
 static void vfio_listener_region_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
-    vfio_container_region_add(container, section);
+    vfio_container_region_add(container, section, false);
 }
 
 void vfio_container_region_add(VFIOContainer *container,
-                               MemoryRegionSection *section)
+                               MemoryRegionSection *section, bool remap)
 {
     hwaddr iova, end;
     Int128 llend, llsize;
@@ -1033,6 +1053,30 @@ void vfio_container_region_add(VFIOContainer *container,
         int iommu_idx;
 
         trace_vfio_listener_region_add_iommu(iova, end);
+
+        /*
+         * If remap, then VFIO_DMA_UNMAP_FLAG_VADDR has been called, and we
+         * want to remap the vaddr.  vfio_container_region_add was already
+         * called in the past, so the giommu already exists.  Find it and
+         * replay it, which calls vfio_dma_map further down the stack.
+         */
+
+        if (remap) {
+            hwaddr as_offset = section->offset_within_address_space;
+            hwaddr iommu_offset = as_offset - section->offset_within_region;
+
+            QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
+                if (giommu->iommu_mr == iommu_mr &&
+                    giommu->iommu_offset == iommu_offset) {
+                    memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
+                    return;
+                }
+            }
+            error_report("Container cannot find iommu region %s offset %lx",
+                memory_region_name(section->mr), iommu_offset);
+            goto fail;
+        }
+
         /*
          * FIXME: For VFIO iommu types which have KVM acceleration to
          * avoid bouncing all map/unmaps through qemu this way, this
@@ -1083,7 +1127,21 @@ void vfio_container_region_add(VFIOContainer *container,
      * about changes.
      */
     if (memory_region_has_ram_discard_manager(section->mr)) {
-        vfio_register_ram_discard_listener(container, section);
+        /*
+         * If remap, then VFIO_DMA_UNMAP_FLAG_VADDR has been called, and we
+         * want to remap the vaddr.  vfio_container_region_add was already
+         * called in the past, so the ram discard listener already exists.
+         * Call its populate function directly, which calls vfio_dma_map.
+         */
+        if (remap)  {
+            VFIORamDiscardListener *vrdl =
+                vfio_find_ram_discard_listener(container, section);
+            if (vrdl->listener.notify_populate(&vrdl->listener, section)) {
+                error_report("listener.notify_populate failed");
+            }
+        } else {
+            vfio_register_ram_discard_listener(container, section);
+        }
         return;
     }
 
@@ -1417,19 +1475,8 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
                                                    MemoryRegionSection *section)
 {
     RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
-    VFIORamDiscardListener *vrdl = NULL;
-
-    QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
-        if (vrdl->mr == section->mr &&
-            vrdl->offset_within_address_space ==
-            section->offset_within_address_space) {
-            break;
-        }
-    }
-
-    if (!vrdl) {
-        hw_error("vfio: Trying to sync missing RAM discard listener");
-    }
+    VFIORamDiscardListener *vrdl =
+        vfio_find_ram_discard_listener(container, section);
 
     /*
      * We only want/can synchronize the bitmap for actually mapped parts -
diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 83f7872..1f682cb 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -34,6 +34,15 @@ vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
     return 0;
 }
 
+static int
+vfio_region_remap(MemoryRegionSection *section, void *handle, Error **errp)
+{
+    VFIOContainer *container = handle;
+    vfio_container_region_add(container, section, true);
+    container->vaddr_unmapped = false;
+    return 0;
+}
+
 static bool vfio_is_cpr_capable(VFIOContainer *container, Error **errp)
 {
     if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR) ||
@@ -99,6 +108,30 @@ static const VMStateDescription vfio_container_vmstate = {
     }
 };
 
+static void vfio_cpr_fail_notifier(Notifier *notifier, void *data)
+{
+    MigrationState *s = data;
+    VFIOContainer *container;
+    Error *err = NULL;
+
+    if (!migration_has_failed(s) || migrate_mode_of(s) != MIG_MODE_CPR_EXEC) {
+        return;
+    }
+
+    container = container_of(notifier, VFIOContainer, cpr_notifier);
+    if (container->vaddr_unmapped) {
+
+        /* Set reused so vfio_dma_map restores vaddr */
+        container->reused = true;
+        if (address_space_flat_for_each_section(container->space->as,
+                                                vfio_region_remap,
+                                                container, &err)) {
+            error_report_err(err);
+        }
+        container->reused = false;
+    }
+}
+
 int vfio_cpr_register_container(VFIOContainer *container, Error **errp)
 {
     container->cpr_blocker = NULL;
@@ -109,6 +142,7 @@ int vfio_cpr_register_container(VFIOContainer *container, Error **errp)
 
     vmstate_register(NULL, -1, &vfio_container_vmstate, container);
 
+    migration_add_notifier(&container->cpr_notifier, vfio_cpr_fail_notifier);
     return 0;
 }
 
@@ -117,4 +151,6 @@ void vfio_cpr_unregister_container(VFIOContainer *container)
     migrate_del_blocker(&container->cpr_blocker);
 
     vmstate_unregister(NULL, &vfio_container_vmstate, container);
+
+    migration_remove_notifier(&container->cpr_notifier);
 }
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 17ad9ba..b682645 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -244,7 +244,7 @@ vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
 extern const MemoryListener vfio_prereg_listener;
 void vfio_listener_register(VFIOContainer *container);
 void vfio_container_region_add(VFIOContainer *container,
-                               MemoryRegionSection *section);
+                               MemoryRegionSection *section, bool remap);
 
 int vfio_spapr_create_window(VFIOContainer *container,
                              MemoryRegionSection *section,
-- 
1.8.3.1



  parent reply	other threads:[~2022-07-26 16:37 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-26 16:09 [PATCH V9 00/46] Live Update Steve Sistare
2022-07-26 16:09 ` [PATCH V9 01/46] migration: fix populate_vfio_info Steve Sistare
2022-07-26 16:09 ` [PATCH V9 02/46] memory: RAM_NAMED_FILE flag Steve Sistare
2022-07-26 16:10 ` [PATCH V9 03/46] migration: file URI Steve Sistare
2022-07-26 16:10 ` [PATCH V9 04/46] migration: mode parameter Steve Sistare
2022-07-26 16:10 ` [PATCH V9 05/46] migration: migrate-enable-mode option Steve Sistare
2022-07-26 16:10 ` [PATCH V9 06/46] migration: simplify blockers Steve Sistare
2022-07-26 16:10 ` [PATCH V9 07/46] migration: per-mode blockers Steve Sistare
2022-07-26 16:10 ` [PATCH V9 08/46] cpr: relax some blockers Steve Sistare
2022-07-26 16:10 ` [PATCH V9 09/46] cpr: reboot mode Steve Sistare
2022-07-26 16:10 ` [PATCH V9 10/46] qdev-properties: strList Steve Sistare
2023-06-08 14:50   ` Steven Sistare
2022-07-26 16:10 ` [PATCH V9 11/46] qapi: strList_from_string Steve Sistare
2022-07-26 16:10 ` [PATCH V9 12/46] qapi: QAPI_LIST_LENGTH Steve Sistare
2022-07-26 16:10 ` [PATCH V9 13/46] qapi: strv_from_strList Steve Sistare
2022-07-26 16:10 ` [PATCH V9 14/46] qapi: strList unit tests Steve Sistare
2022-07-26 16:10 ` [PATCH V9 15/46] migration: cpr-exec-args parameter Steve Sistare
2022-07-26 16:10 ` [PATCH V9 16/46] migration: simplify notifiers Steve Sistare
2022-07-26 16:10 ` [PATCH V9 17/46] migration: check mode in notifiers Steve Sistare
2022-07-26 16:10 ` [PATCH V9 18/46] memory: flat section iterator Steve Sistare
2022-07-26 16:10 ` [PATCH V9 19/46] oslib: qemu_clear_cloexec Steve Sistare
2022-07-26 16:10 ` [PATCH V9 20/46] vl: helper to request re-exec Steve Sistare
2022-07-26 16:10 ` [PATCH V9 21/46] cpr: preserve extra state Steve Sistare
2022-07-26 16:10 ` [PATCH V9 22/46] cpr: exec mode Steve Sistare
2022-07-26 16:10 ` [PATCH V9 23/46] cpr: add exec-mode blockers Steve Sistare
2022-07-26 16:10 ` [PATCH V9 24/46] cpr: ram block blockers Steve Sistare
2022-07-26 16:10 ` [PATCH V9 25/46] cpr: only-cpr-capable Steve Sistare
2022-07-26 16:10 ` [PATCH V9 26/46] cpr: Mismatched GPAs fix Steve Sistare
2022-07-26 16:10 ` [PATCH V9 27/46] hostmem-memfd: cpr support Steve Sistare
2022-07-26 16:10 ` [PATCH V9 28/46] hostmem-epc: " Steve Sistare
2022-07-26 16:10 ` [PATCH V9 29/46] pci: export msix_is_pending Steve Sistare
2022-07-26 16:10 ` [PATCH V9 30/46] vfio-pci: refactor for cpr Steve Sistare
2022-07-26 16:10 ` [PATCH V9 31/46] vfio-pci: cpr part 1 (fd and dma) Steve Sistare
2022-07-26 16:10 ` [PATCH V9 32/46] vfio-pci: cpr part 2 (msi) Steve Sistare
2023-07-05  8:56   ` Kunkun Jiang via
2023-07-10 15:43     ` Steven Sistare
2023-07-13 12:35       ` Kunkun Jiang via
2023-07-13 12:42         ` Marc Zyngier
2022-07-26 16:10 ` [PATCH V9 33/46] vfio-pci: cpr part 3 (intx) Steve Sistare
2022-07-26 16:10 ` Steve Sistare [this message]
2022-07-26 16:10 ` [PATCH V9 35/46] vhost: reset vhost devices for cpr Steve Sistare
2022-07-26 16:10 ` [PATCH V9 36/46] chardev: cpr framework Steve Sistare
2022-07-26 16:10 ` [PATCH V9 37/46] chardev: cpr for simple devices Steve Sistare
2022-07-26 16:10 ` [PATCH V9 38/46] chardev: cpr for pty Steve Sistare
2022-07-26 16:10 ` [PATCH V9 39/46] chardev: cpr for sockets Steve Sistare
2022-07-26 16:10 ` [PATCH V9 40/46] python/machine: QEMUMachine full_args Steve Sistare
2022-07-26 18:00   ` John Snow
2022-07-26 16:10 ` [PATCH V9 41/46] python/machine: QEMUMachine reopen_qmp_connection Steve Sistare
2022-07-26 18:04   ` John Snow
2022-07-27 12:06     ` Steven Sistare
2022-07-26 16:10 ` [PATCH V9 42/46] tests/avocado: add cpr regression test Steve Sistare
2023-12-01 10:44   ` Philippe Mathieu-Daudé
2022-07-26 16:10 ` [PATCH V9 43/46] vl: start on wakeup request Steve Sistare
2022-07-26 16:10 ` [PATCH V9 44/46] migration: fix suspended runstate Steve Sistare
2022-07-26 16:10 ` [PATCH V9 45/46] migration: notifier error reporting Steve Sistare
2022-07-26 16:10 ` [PATCH V9 46/46] vfio: allow cpr-reboot migration if suspended Steve Sistare
2022-12-07 15:48 ` [PATCH V9 00/46] Live Update Steven Sistare
2023-02-07 18:44   ` Steven Sistare
2023-02-07 19:01     ` Steven Sistare
2023-05-30 13:38     ` Philippe Mathieu-Daudé
2023-05-30 13:53       ` Steven Sistare
2023-04-07 17:35   ` Michael Galaxy
2023-04-14 19:20   ` Michael Galaxy
2023-06-06 22:15   ` Michael Galaxy
2023-06-07 15:55     ` Michael Galaxy
2023-06-07 17:37       ` Steven Sistare
2023-06-12 14:59         ` Michael Galaxy
2023-07-10 15:10           ` Steven Sistare
2023-07-13 15:53             ` Michael Galaxy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1658851843-236870-35-git-send-email-steven.sistare@oracle.com \
    --to=steven.sistare@oracle.com \
    --cc=alex.bennee@linaro.org \
    --cc=alex.williamson@redhat.com \
    --cc=armbru@redhat.com \
    --cc=berrange@redhat.com \
    --cc=david@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=eblake@redhat.com \
    --cc=imammedo@redhat.com \
    --cc=jason.zeng@linux.intel.com \
    --cc=jsnow@redhat.com \
    --cc=marcandre.lureau@redhat.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mark.kanda@oracle.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=philippe.mathieu.daude@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=tcx4c70@gmail.com \
    --cc=tugy@chinatelecom.cn \
    --cc=zhengchuan@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.