All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Hildenbrand <david@redhat.com>
To: qemu-devel@nongnu.org
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>,
	David Hildenbrand <david@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
	Peter Xu <peterx@redhat.com>,
	Luiz Capitulino <lcapitulino@redhat.com>,
	Auger Eric <eric.auger@redhat.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	Wei Yang <richardw.yang@linux.intel.com>,
	Igor Mammedov <imammedo@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Subject: [PATCH PROTOTYPE 3/6] vfio: Implement support for sparse RAM memory regions
Date: Thu, 24 Sep 2020 18:04:20 +0200	[thread overview]
Message-ID: <20200924160423.106747-4-david@redhat.com> (raw)
In-Reply-To: <20200924160423.106747-1-david@redhat.com>

Implement support for sparse RAM, to be used by virtio-mem. Handling
is somewhat-similar to memory_region_is_iommu() handling, which also
notifies on changes.

Instead of mapping the whole region, we only map selected pieces (and
unmap previously selected pieces) when notified by the SparseRAMHandler.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Wei Yang <richardw.yang@linux.intel.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 hw/vfio/common.c              | 155 ++++++++++++++++++++++++++++++++++
 include/hw/vfio/vfio-common.h |  12 +++
 2 files changed, 167 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 13471ae294..a3aaf70dd8 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -37,6 +37,7 @@
 #include "sysemu/reset.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "qemu/units.h"
 
 VFIOGroupList vfio_group_list =
     QLIST_HEAD_INITIALIZER(vfio_group_list);
@@ -498,6 +499,143 @@ out:
     rcu_read_unlock();
 }
 
+static int vfio_sparse_ram_notify(SparseRAMNotifier *n, const MemoryRegion *mr,
+                                  uint64_t mr_offset, uint64_t size,
+                                  bool map)
+{
+    VFIOSparseRAM *sram = container_of(n, VFIOSparseRAM, notifier);
+    const hwaddr mr_start = MAX(mr_offset, sram->offset_within_region);
+    const hwaddr mr_end = MIN(mr_offset + size,
+                              sram->offset_within_region + sram->size);
+    const hwaddr iova_start = mr_start + sram->offset_within_address_space;
+    const hwaddr iova_end = mr_end + sram->offset_within_address_space;
+    hwaddr mr_cur, iova_cur, mr_next;
+    void *vaddr;
+    int ret, ret2;
+
+    g_assert(mr == sram->mr);
+
+    /* We get notified about everything, ignore range we don't care about. */
+    if (mr_start >= mr_end) {
+        return 0;
+    }
+
+    /* Unmap everything with a single call. */
+    if (!map) {
+        ret = vfio_dma_unmap(sram->container, iova_start,
+                             iova_end - iova_start);
+        if (ret) {
+            error_report("%s: vfio_dma_unmap() failed: %s", __func__,
+                         strerror(-ret));
+        }
+        return 0;
+    }
+
+    /* TODO: fail early if we would exceed a specified number of mappings. */
+
+    /* Map in (aligned within MR) granularity, so we can unmap later. */
+    for (mr_cur = mr_start; mr_cur < mr_end; mr_cur = mr_next) {
+        iova_cur = mr_cur + sram->offset_within_address_space;
+        mr_next = QEMU_ALIGN_UP(mr_cur + 1, sram->granularity);
+        mr_next = MIN(mr_next, mr_end);
+
+        vaddr = memory_region_get_ram_ptr(sram->mr) + mr_cur;
+        ret = vfio_dma_map(sram->container, iova_cur, mr_next - mr_cur,
+                           vaddr, mr->readonly);
+        if (ret) {
+            /* Rollback in case of error. */
+            if (mr_cur != mr_start) {
+                ret2 = vfio_dma_unmap(sram->container, iova_start,
+                                      iova_end - iova_start);
+                if (ret2) {
+                    error_report("%s: vfio_dma_unmap() failed: %s", __func__,
+                                  strerror(-ret));
+                }
+            }
+            return ret;
+        }
+    }
+    return 0;
+}
+
+static int vfio_sparse_ram_notify_map(SparseRAMNotifier *n,
+                                      const MemoryRegion *mr,
+                                      uint64_t mr_offset, uint64_t size)
+{
+    return vfio_sparse_ram_notify(n, mr, mr_offset, size, true);
+}
+
+static void vfio_sparse_ram_notify_unmap(SparseRAMNotifier *n,
+                                         const MemoryRegion *mr,
+                                         uint64_t mr_offset, uint64_t size)
+{
+    vfio_sparse_ram_notify(n, mr, mr_offset, size, false);
+}
+
+static void vfio_register_sparse_ram(VFIOContainer *container,
+                                     MemoryRegionSection *section)
+{
+    VFIOSparseRAM *sram;
+    int ret;
+
+    sram = g_new0(VFIOSparseRAM, 1);
+    sram->container = container;
+    sram->mr = section->mr;
+    sram->offset_within_region = section->offset_within_region;
+    sram->offset_within_address_space = section->offset_within_address_space;
+    sram->size = int128_get64(section->size);
+    sram->granularity = memory_region_sparse_ram_get_granularity(section->mr);
+
+    /*
+     * TODO: We usually want a bigger granularity (for a lot of addded memory,
+     * as we need quite a lot of mappings) - however, this has to be configured
+     * by the user.
+     */
+    g_assert(sram->granularity >= 1 * MiB &&
+             is_power_of_2(sram->granularity));
+
+    /* Register the notifier */
+    sparse_ram_notifier_init(&sram->notifier, vfio_sparse_ram_notify_map,
+                             vfio_sparse_ram_notify_unmap);
+    memory_region_register_sparse_ram_notifier(section->mr, &sram->notifier);
+    QLIST_INSERT_HEAD(&container->sram_list, sram, next);
+    /*
+     * Replay mapped blocks - if anything goes wrong (only when hotplugging
+     * vfio devices), report the error for now.
+     *
+     * TODO: Can we catch this earlier?
+     */
+    ret = memory_region_sparse_ram_replay_mapped(section->mr, &sram->notifier);
+    if (ret) {
+        error_report("%s: failed to replay mappings: %s", __func__,
+                     strerror(-ret));
+    }
+}
+
+static void vfio_unregister_sparse_ram(VFIOContainer *container,
+                                       MemoryRegionSection *section)
+{
+    VFIOSparseRAM *sram = NULL;
+
+    QLIST_FOREACH(sram, &container->sram_list, next) {
+        if (sram->mr == section->mr &&
+            sram->offset_within_region == section->offset_within_region &&
+            sram->offset_within_address_space ==
+            section->offset_within_address_space) {
+            break;
+        }
+    }
+
+    if (!sram) {
+        hw_error("vfio: Trying to unregister non-existant sparse RAM");
+    }
+
+    memory_region_unregister_sparse_ram_notifier(section->mr, &sram->notifier);
+    QLIST_REMOVE(sram, next);
+    g_free(sram);
+    /* The caller is expected to vfio_dma_unmap(). */
+}
+
 static void vfio_listener_region_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
@@ -650,6 +788,15 @@ static void vfio_listener_region_add(MemoryListener *listener,
 
     /* Here we assume that memory_region_is_ram(section->mr)==true */
 
+    /*
+     * For sparse RAM, we only want to register the actually mapped
+     * pieces - and update the mapping whenever we're notified about changes.
+     */
+    if (memory_region_is_sparse_ram(section->mr)) {
+        vfio_register_sparse_ram(container, section);
+        return;
+    }
+
     vaddr = memory_region_get_ram_ptr(section->mr) +
             section->offset_within_region +
             (iova - section->offset_within_address_space);
@@ -786,6 +933,13 @@ static void vfio_listener_region_del(MemoryListener *listener,
 
         pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
         try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
+    } else if (memory_region_is_sparse_ram(section->mr)) {
+        vfio_unregister_sparse_ram(container, section);
+        /*
+         * We rely on a single vfio_dma_unmap() call below to clean the whole
+         * region.
+         */
+        try_unmap = true;
     }
 
     if (try_unmap) {
@@ -1275,6 +1429,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     container->error = NULL;
     QLIST_INIT(&container->giommu_list);
     QLIST_INIT(&container->hostwin_list);
+    QLIST_INIT(&container->sram_list);
 
     ret = vfio_init_container(container, group->fd, errp);
     if (ret) {
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index c78f3ff559..dfa18dbd8e 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -77,6 +77,7 @@ typedef struct VFIOContainer {
     QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
     QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
     QLIST_HEAD(, VFIOGroup) group_list;
+    QLIST_HEAD(, VFIOSparseRAM) sram_list;
     QLIST_ENTRY(VFIOContainer) next;
 } VFIOContainer;
 
@@ -88,6 +89,17 @@ typedef struct VFIOGuestIOMMU {
     QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
 } VFIOGuestIOMMU;
 
+typedef struct VFIOSparseRAM {
+    VFIOContainer *container;
+    MemoryRegion *mr;
+    hwaddr offset_within_region;
+    hwaddr offset_within_address_space;
+    hwaddr size;
+    uint64_t granularity;
+    SparseRAMNotifier notifier;
+    QLIST_ENTRY(VFIOSparseRAM) next;
+} VFIOSparseRAM;
+
 typedef struct VFIOHostDMAWindow {
     hwaddr min_iova;
     hwaddr max_iova;
-- 
2.26.2



  parent reply	other threads:[~2020-09-24 16:42 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-24 16:04 [PATCH PROTOTYPE 0/6] virtio-mem: vfio support David Hildenbrand
2020-09-24 16:04 ` [PATCH PROTOTYPE 1/6] memory: Introduce sparse RAM handler for memory regions David Hildenbrand
2020-10-20 19:24   ` Peter Xu
2020-10-20 20:13     ` David Hildenbrand
2020-09-24 16:04 ` [PATCH PROTOTYPE 2/6] virtio-mem: Impelement SparseRAMHandler interface David Hildenbrand
2020-09-24 16:04 ` David Hildenbrand [this message]
2020-10-20 19:44   ` [PATCH PROTOTYPE 3/6] vfio: Implement support for sparse RAM memory regions Peter Xu
2020-10-20 20:01     ` David Hildenbrand
2020-10-20 20:44       ` Peter Xu
2020-11-12 10:11         ` David Hildenbrand
2020-11-18 13:04         ` David Hildenbrand
2020-11-18 15:23           ` Peter Xu
2020-11-18 16:14             ` David Hildenbrand
2020-11-18 17:01               ` Peter Xu
2020-11-18 17:37                 ` David Hildenbrand
2020-11-18 19:05                   ` Peter Xu
2020-11-18 19:20                     ` David Hildenbrand
2020-09-24 16:04 ` [PATCH PROTOTYPE 4/6] memory: Extend ram_block_discard_(require|disable) by two discard types David Hildenbrand
2020-10-20 19:17   ` Peter Xu
2020-10-20 19:58     ` David Hildenbrand
2020-10-20 20:49       ` Peter Xu
2020-10-20 21:30         ` Peter Xu
2020-09-24 16:04 ` [PATCH PROTOTYPE 5/6] virtio-mem: Require only RAM_BLOCK_DISCARD_T_COORDINATED discards David Hildenbrand
2020-09-24 16:04 ` [PATCH PROTOTYPE 6/6] vfio: Disable only RAM_BLOCK_DISCARD_T_UNCOORDINATED discards David Hildenbrand
2020-09-24 19:30 ` [PATCH PROTOTYPE 0/6] virtio-mem: vfio support no-reply
2020-09-29 17:02 ` Dr. David Alan Gilbert
2020-09-29 17:05   ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200924160423.106747-4-david@redhat.com \
    --to=david@redhat.com \
    --cc=alex.williamson@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=imammedo@redhat.com \
    --cc=lcapitulino@redhat.com \
    --cc=mst@redhat.com \
    --cc=pankaj.gupta.linux@gmail.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richardw.yang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.