All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PULL 0/7] vhost, pc features, fixes
@ 2015-10-16  8:49 Michael S. Tsirkin
  2015-10-16  8:49 ` [Qemu-devel] [PULL 1/7] intel_iommu: Add support for translation for devices behind bridges Michael S. Tsirkin
                   ` (7 more replies)
  0 siblings, 8 replies; 10+ messages in thread
From: Michael S. Tsirkin @ 2015-10-16  8:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell

The following changes since commit b37686f7e84b22cfaf7fd01ac5133f2617cc3027:

  Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging (2015-10-09 12:18:14 +0100)

are available in the git repository at:

  git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to 7f31381190755a1aa22c5cd486723b674ab83369:

  vhost: fail backend intialization early (2015-10-16 11:44:15 +0300)

----------------------------------------------------------------
vhost, pc features, fixes

New features:
    VT-d support for devices behind a bridge

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

----------------------------------------------------------------
Igor Mammedov (3):
      vhost: add vhost_has_free_slot() interface
      pc-dimm: add vhost slots limit check before commiting to hotplug
      vhost: fail backend intialization early

Knut Omang (1):
      intel_iommu: Add support for translation for devices behind bridges

Marc-André Lureau (1):
      tests: re-enable vhost-user-test

Michael S. Tsirkin (2):
      exec: factor out duplicate mmap code
      net: don't set native endianness

 configure                         |  1 +
 include/hw/i386/intel_iommu.h     | 16 ++++++-
 include/hw/virtio/vhost-backend.h |  2 +
 include/hw/virtio/vhost.h         |  2 +
 include/qemu/mmap-alloc.h         | 10 +++++
 exec.c                            | 47 +++++----------------
 hw/i386/intel_iommu.c             | 89 ++++++++++++++++++++++++++++++++-------
 hw/mem/pc-dimm.c                  |  7 +++
 hw/pci-host/q35.c                 | 25 ++---------
 hw/virtio/vhost-backend.c         | 19 +++++++++
 hw/virtio/vhost-user.c            |  6 +++
 hw/virtio/vhost.c                 | 27 ++++++++++++
 net/net.c                         |  8 ++++
 stubs/vhost.c                     |  6 +++
 util/mmap-alloc.c                 | 71 +++++++++++++++++++++++++++++++
 util/oslib-posix.c                | 28 ++----------
 stubs/Makefile.objs               |  1 +
 tests/Makefile                    |  4 +-
 util/Makefile.objs                |  1 +
 19 files changed, 266 insertions(+), 104 deletions(-)
 create mode 100644 include/qemu/mmap-alloc.h
 create mode 100644 stubs/vhost.c
 create mode 100644 util/mmap-alloc.c

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PULL 1/7] intel_iommu: Add support for translation for devices behind bridges
  2015-10-16  8:49 [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Michael S. Tsirkin
@ 2015-10-16  8:49 ` Michael S. Tsirkin
  2015-10-16  8:49 ` [Qemu-devel] [PULL 2/7] exec: factor out duplicate mmap code Michael S. Tsirkin
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Michael S. Tsirkin @ 2015-10-16  8:49 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Richard Henderson, Knut Omang, Eduardo Habkost,
	Paolo Bonzini

From: Knut Omang <knut.omang@oracle.com>

- Use a hash table indexed on bus pointers to store information about buses
  instead of using the bus numbers.
  Bus pointers are stored in a new VTDBus struct together with the vector
  of device address space pointers indexed by devfn.
- The bus number is still used for lookup for selective SID based invalidate,
  in which case the bus number is lazily resolved from the bus hash table and
  cached in a separate index.

Signed-off-by: Knut Omang <knut.omang@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/i386/intel_iommu.h | 16 +++++++-
 hw/i386/intel_iommu.c         | 89 +++++++++++++++++++++++++++++++++++--------
 hw/pci-host/q35.c             | 25 ++----------
 3 files changed, 90 insertions(+), 40 deletions(-)

diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index e321ee4..5dbadb7 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -49,6 +49,7 @@ typedef struct VTDContextCacheEntry VTDContextCacheEntry;
 typedef struct IntelIOMMUState IntelIOMMUState;
 typedef struct VTDAddressSpace VTDAddressSpace;
 typedef struct VTDIOTLBEntry VTDIOTLBEntry;
+typedef struct VTDBus VTDBus;
 
 /* Context-Entry */
 struct VTDContextEntry {
@@ -65,7 +66,7 @@ struct VTDContextCacheEntry {
 };
 
 struct VTDAddressSpace {
-    uint8_t bus_num;
+    PCIBus *bus;
     uint8_t devfn;
     AddressSpace as;
     MemoryRegion iommu;
@@ -73,6 +74,11 @@ struct VTDAddressSpace {
     VTDContextCacheEntry context_cache_entry;
 };
 
+struct VTDBus {
+    PCIBus* bus;		/* A reference to the bus to provide translation for */
+    VTDAddressSpace *dev_as[0];	/* A table of VTDAddressSpace objects indexed by devfn */
+};
+
 struct VTDIOTLBEntry {
     uint64_t gfn;
     uint16_t domain_id;
@@ -114,7 +120,13 @@ struct IntelIOMMUState {
     GHashTable *iotlb;              /* IOTLB */
 
     MemoryRegionIOMMUOps iommu_ops;
-    VTDAddressSpace **address_spaces[VTD_PCI_BUS_MAX];
+    GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */
+    VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
 };
 
+/* Find the VTD Address space associated with the given bus pointer,
+ * create a new one if none exists
+ */
+VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn);
+
 #endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 08055a8..3fe27fa 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -22,6 +22,7 @@
 #include "hw/sysbus.h"
 #include "exec/address-spaces.h"
 #include "intel_iommu_internal.h"
+#include "hw/pci/pci.h"
 
 /*#define DEBUG_INTEL_IOMMU*/
 #ifdef DEBUG_INTEL_IOMMU
@@ -166,19 +167,17 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
  */
 static void vtd_reset_context_cache(IntelIOMMUState *s)
 {
-    VTDAddressSpace **pvtd_as;
     VTDAddressSpace *vtd_as;
-    uint32_t bus_it;
+    VTDBus *vtd_bus;
+    GHashTableIter bus_it;
     uint32_t devfn_it;
 
+    g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
+
     VTD_DPRINTF(CACHE, "global context_cache_gen=1");
-    for (bus_it = 0; bus_it < VTD_PCI_BUS_MAX; ++bus_it) {
-        pvtd_as = s->address_spaces[bus_it];
-        if (!pvtd_as) {
-            continue;
-        }
+    while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
         for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
-            vtd_as = pvtd_as[devfn_it];
+            vtd_as = vtd_bus->dev_as[devfn_it];
             if (!vtd_as) {
                 continue;
             }
@@ -754,12 +753,13 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
  * @is_write: The access is a write operation
  * @entry: IOMMUTLBEntry that contain the addr to be translated and result
  */
-static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num,
+static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
                                    uint8_t devfn, hwaddr addr, bool is_write,
                                    IOMMUTLBEntry *entry)
 {
     IntelIOMMUState *s = vtd_as->iommu_state;
     VTDContextEntry ce;
+    uint8_t bus_num = pci_bus_num(bus);
     VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
     uint64_t slpte;
     uint32_t level;
@@ -874,6 +874,29 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
     }
 }
 
+
+/* Find the VTD address space currently associated with a given bus number,
+ */
+static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
+{
+    VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
+    if (!vtd_bus) {
+        /* Iterate over the registered buses to find the one
+         * which currently hold this bus number, and update the bus_num lookup table:
+         */
+        GHashTableIter iter;
+
+        g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
+        while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) {
+            if (pci_bus_num(vtd_bus->bus) == bus_num) {
+                s->vtd_as_by_bus_num[bus_num] = vtd_bus;
+                return vtd_bus;
+            }
+        }
+    }
+    return vtd_bus;
+}
+
 /* Do a context-cache device-selective invalidation.
  * @func_mask: FM field after shifting
  */
@@ -882,7 +905,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
                                           uint16_t func_mask)
 {
     uint16_t mask;
-    VTDAddressSpace **pvtd_as;
+    VTDBus *vtd_bus;
     VTDAddressSpace *vtd_as;
     uint16_t devfn;
     uint16_t devfn_it;
@@ -903,11 +926,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
     }
     VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
                     " mask %"PRIu16, source_id, mask);
-    pvtd_as = s->address_spaces[VTD_SID_TO_BUS(source_id)];
-    if (pvtd_as) {
+    vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
+    if (vtd_bus) {
         devfn = VTD_SID_TO_DEVFN(source_id);
         for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
-            vtd_as = pvtd_as[devfn_it];
+            vtd_as = vtd_bus->dev_as[devfn_it];
             if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
                 VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
                             devfn_it);
@@ -1805,11 +1828,11 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
         return ret;
     }
 
-    vtd_do_iommu_translate(vtd_as, vtd_as->bus_num, vtd_as->devfn, addr,
+    vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr,
                            is_write, &ret);
     VTD_DPRINTF(MMU,
                 "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
-                " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, vtd_as->bus_num,
+                " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
                 VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn),
                 vtd_as->devfn, addr, ret.translated_addr);
     return ret;
@@ -1839,6 +1862,38 @@ static Property vtd_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+
+VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
+{
+    uintptr_t key = (uintptr_t)bus;
+    VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
+    VTDAddressSpace *vtd_dev_as;
+
+    if (!vtd_bus) {
+        /* No corresponding free() */
+        vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX);
+        vtd_bus->bus = bus;
+        key = (uintptr_t)bus;
+        g_hash_table_insert(s->vtd_as_by_busptr, &key, vtd_bus);
+    }
+
+    vtd_dev_as = vtd_bus->dev_as[devfn];
+
+    if (!vtd_dev_as) {
+        vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
+
+        vtd_dev_as->bus = bus;
+        vtd_dev_as->devfn = (uint8_t)devfn;
+        vtd_dev_as->iommu_state = s;
+        vtd_dev_as->context_cache_entry.context_cache_gen = 0;
+        memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
+                                 &s->iommu_ops, "intel_iommu", UINT64_MAX);
+        address_space_init(&vtd_dev_as->as,
+                           &vtd_dev_as->iommu, "intel_iommu");
+    }
+    return vtd_dev_as;
+}
+
 /* Do the initialization. It will also be called when reset, so pay
  * attention when adding new initialization stuff.
  */
@@ -1931,13 +1986,15 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
 
     VTD_DPRINTF(GENERAL, "");
-    memset(s->address_spaces, 0, sizeof(s->address_spaces));
+    memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
                           "intel_iommu", DMAR_REG_SIZE);
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
     /* No corresponding destroy */
     s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
                                      g_free, g_free);
+    s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
+                                              g_free, g_free);
     vtd_init(s);
 }
 
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index bd74094..c81507d 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -426,31 +426,12 @@ static void mch_reset(DeviceState *qdev)
 static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 {
     IntelIOMMUState *s = opaque;
-    VTDAddressSpace **pvtd_as;
-    int bus_num = pci_bus_num(bus);
+    VTDAddressSpace *vtd_as;
 
-    assert(0 <= bus_num && bus_num <= VTD_PCI_BUS_MAX);
     assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX);
 
-    pvtd_as = s->address_spaces[bus_num];
-    if (!pvtd_as) {
-        /* No corresponding free() */
-        pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX);
-        s->address_spaces[bus_num] = pvtd_as;
-    }
-    if (!pvtd_as[devfn]) {
-        pvtd_as[devfn] = g_malloc0(sizeof(VTDAddressSpace));
-
-        pvtd_as[devfn]->bus_num = (uint8_t)bus_num;
-        pvtd_as[devfn]->devfn = (uint8_t)devfn;
-        pvtd_as[devfn]->iommu_state = s;
-        pvtd_as[devfn]->context_cache_entry.context_cache_gen = 0;
-        memory_region_init_iommu(&pvtd_as[devfn]->iommu, OBJECT(s),
-                                 &s->iommu_ops, "intel_iommu", UINT64_MAX);
-        address_space_init(&pvtd_as[devfn]->as,
-                           &pvtd_as[devfn]->iommu, "intel_iommu");
-    }
-    return &pvtd_as[devfn]->as;
+    vtd_as = vtd_find_add_as(s, bus, devfn);
+    return &vtd_as->as;
 }
 
 static void mch_init_dmar(MCHPCIState *mch)
-- 
MST

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PULL 2/7] exec: factor out duplicate mmap code
  2015-10-16  8:49 [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Michael S. Tsirkin
  2015-10-16  8:49 ` [Qemu-devel] [PULL 1/7] intel_iommu: Add support for translation for devices behind bridges Michael S. Tsirkin
@ 2015-10-16  8:49 ` Michael S. Tsirkin
  2015-10-25 15:04   ` Paolo Bonzini
  2015-10-16  8:49 ` [Qemu-devel] [PULL 3/7] net: don't set native endianness Michael S. Tsirkin
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 10+ messages in thread
From: Michael S. Tsirkin @ 2015-10-16  8:49 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?=, Paolo Bonzini

Anonymous and file-backed RAM allocation are now almost exactly the same.

Reduce code duplication by moving RAM mmap code out of oslib-posix.c and
exec.c.

Reported-by: Marc-André Lureau <mlureau@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/qemu/mmap-alloc.h | 10 +++++++
 exec.c                    | 47 +++++++------------------------
 util/mmap-alloc.c         | 71 +++++++++++++++++++++++++++++++++++++++++++++++
 util/oslib-posix.c        | 28 +++----------------
 util/Makefile.objs        |  1 +
 5 files changed, 96 insertions(+), 61 deletions(-)
 create mode 100644 include/qemu/mmap-alloc.h
 create mode 100644 util/mmap-alloc.c

diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
new file mode 100644
index 0000000..56388e6
--- /dev/null
+++ b/include/qemu/mmap-alloc.h
@@ -0,0 +1,10 @@
+#ifndef QEMU_MMAP_ALLOC
+#define QEMU_MMAP_ALLOC
+
+#include "qemu-common.h"
+
+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
+
+void qemu_ram_munmap(void *ptr, size_t size);
+
+#endif
diff --git a/exec.c b/exec.c
index 7d90a52..4505dc7 100644
--- a/exec.c
+++ b/exec.c
@@ -55,6 +55,9 @@
 #include "exec/ram_addr.h"
 
 #include "qemu/range.h"
+#ifndef _WIN32
+#include "qemu/mmap-alloc.h"
+#endif
 
 //#define DEBUG_SUBPAGE
 
@@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned;
  */
 #define RAM_RESIZEABLE (1 << 2)
 
-/* An extra page is mapped on top of this RAM.
+/* RAM is backed by an mmapped file.
  */
-#define RAM_EXTRA (1 << 3)
+#define RAM_FILE (1 << 3)
 #endif
 
 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
@@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block,
     char *filename;
     char *sanitized_name;
     char *c;
-    void *ptr;
-    void *area = NULL;
+    void *area;
     int fd;
     uint64_t hpagesize;
-    uint64_t total;
     Error *local_err = NULL;
-    size_t offset;
 
     hpagesize = gethugepagesize(path, &local_err);
     if (local_err) {
@@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block,
     g_free(filename);
 
     memory = ROUND_UP(memory, hpagesize);
-    total = memory + hpagesize;
 
     /*
      * ftruncate is not supported by hugetlbfs in older
@@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block,
         perror("ftruncate");
     }
 
-    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
-                -1, 0);
-    if (ptr == MAP_FAILED) {
-        error_setg_errno(errp, errno,
-                         "unable to allocate memory range for hugepages");
-        close(fd);
-        goto error;
-    }
-
-    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
-
-    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
-                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
-                MAP_FIXED,
-                fd, 0);
+    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
                          "unable to map backing store for hugepages");
-        munmap(ptr, total);
         close(fd);
         goto error;
     }
 
-    if (offset > 0) {
-        munmap(ptr, offset);
-    }
-    ptr += offset;
-    total -= offset;
-
-    if (total > memory + getpagesize()) {
-        munmap(ptr + memory + getpagesize(),
-               total - memory - getpagesize());
-    }
-
     if (mem_prealloc) {
         os_mem_prealloc(fd, area, memory);
     }
@@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
     new_block->used_length = size;
     new_block->max_length = size;
     new_block->flags = share ? RAM_SHARED : 0;
-    new_block->flags |= RAM_EXTRA;
+    new_block->flags |= RAM_FILE;
     new_block->host = file_ram_alloc(new_block, size,
                                      mem_path, errp);
     if (!new_block->host) {
@@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block)
         xen_invalidate_map_cache_entry(block->host);
 #ifndef _WIN32
     } else if (block->fd >= 0) {
-        if (block->flags & RAM_EXTRA) {
-            munmap(block->host, block->max_length + getpagesize());
+        if (block->flags & RAM_FILE) {
+            qemu_ram_munmap(block->host, block->max_length);
         } else {
             munmap(block->host, block->max_length);
         }
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
new file mode 100644
index 0000000..1394269
--- /dev/null
+++ b/util/mmap-alloc.c
@@ -0,0 +1,71 @@
+/*
+ * Support for RAM backed by mmaped host memory.
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+#include <qemu/mmap-alloc.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
+{
+    /*
+     * Note: this always allocates at least one extra page of virtual address
+     * space, even if size is already aligned.
+     */
+    size_t total = size + align;
+    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+    void *ptr1;
+
+    if (ptr == MAP_FAILED) {
+        return NULL;
+    }
+
+    /* Make sure align is a power of 2 */
+    assert(!(align & (align - 1)));
+    /* Always align to host page size */
+    assert(align >= getpagesize());
+
+    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
+                MAP_FIXED |
+                (fd == -1 ? MAP_ANONYMOUS : 0) |
+                (shared ? MAP_SHARED : MAP_PRIVATE),
+                fd, 0);
+    if (ptr1 == MAP_FAILED) {
+        munmap(ptr, total);
+        return NULL;
+    }
+
+    ptr += offset;
+    total -= offset;
+
+    if (offset > 0) {
+        munmap(ptr - offset, offset);
+    }
+
+    /*
+     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
+     * a guard page guarding against potential buffer overflows.
+     */
+    if (total > size + getpagesize()) {
+        munmap(ptr + size + getpagesize(), total - size - getpagesize());
+    }
+
+    return ptr;
+}
+
+void qemu_ram_munmap(void *ptr, size_t size)
+{
+    if (ptr) {
+        /* Unmap both the RAM block and the guard page */
+        munmap(ptr, size + getpagesize());
+    }
+}
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index a0fcdc2..892d2d8 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -72,6 +72,8 @@ extern int daemon(int, int);
 #include <sys/sysctl.h>
 #endif
 
+#include <qemu/mmap-alloc.h>
+
 int qemu_get_thread_id(void)
 {
 #if defined(__linux__)
@@ -128,10 +130,7 @@ void *qemu_memalign(size_t alignment, size_t size)
 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
 {
     size_t align = QEMU_VMALLOC_ALIGN;
-    size_t total = size + align;
-    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
-    void *ptr1;
+    void *ptr = qemu_ram_mmap(-1, size, align, false);
 
     if (ptr == MAP_FAILED) {
         return NULL;
@@ -141,23 +140,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
         *alignment = align;
     }
 
-    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
-                MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-    if (ptr1 == MAP_FAILED) {
-        munmap(ptr, total);
-        return NULL;
-    }
-
-    ptr += offset;
-    total -= offset;
-
-    if (offset > 0) {
-        munmap(ptr - offset, offset);
-    }
-    if (total > size + getpagesize()) {
-        munmap(ptr + size + getpagesize(), total - size - getpagesize());
-    }
-
     trace_qemu_anon_ram_alloc(size, ptr);
     return ptr;
 }
@@ -171,9 +153,7 @@ void qemu_vfree(void *ptr)
 void qemu_anon_ram_free(void *ptr, size_t size)
 {
     trace_qemu_anon_ram_free(ptr, size);
-    if (ptr) {
-        munmap(ptr, size + getpagesize());
-    }
+    qemu_ram_munmap(ptr, size);
 }
 
 void qemu_set_block(int fd)
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 114d657..f011b81 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -1,6 +1,7 @@
 util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
 util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
 util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o
+util-obj-$(CONFIG_POSIX) += mmap-alloc.o
 util-obj-y += envlist.o path.o module.o
 util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o
 util-obj-y += bitmap.o bitops.o hbitmap.o
-- 
MST

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PULL 3/7] net: don't set native endianness
  2015-10-16  8:49 [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Michael S. Tsirkin
  2015-10-16  8:49 ` [Qemu-devel] [PULL 1/7] intel_iommu: Add support for translation for devices behind bridges Michael S. Tsirkin
  2015-10-16  8:49 ` [Qemu-devel] [PULL 2/7] exec: factor out duplicate mmap code Michael S. Tsirkin
@ 2015-10-16  8:49 ` Michael S. Tsirkin
  2015-10-16  8:49 ` [Qemu-devel] [PULL 4/7] tests: re-enable vhost-user-test Michael S. Tsirkin
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Michael S. Tsirkin @ 2015-10-16  8:49 UTC (permalink / raw)
  To: qemu-devel
  Cc: Marcel Apfelbaum, Peter Maydell, Jason Wang, qemu-stable, Greg Kurz

commit 5be7d9f1b1452613b95c6ba70b8d7ad3d0797991
    vhost-net: tell tap backend about the vnet endianness
makes vhost net always try to set LE - even if that matches the
native endian-ness.

This makes it fail on older kernels on x86 without TUNSETVNETLE support.

To fix, make qemu_set_vnet_le/qemu_set_vnet_be skip the
ioctl if it matches the host endian-ness.

Reported-by: Marcel Apfelbaum <marcel@redhat.com>
Cc: Greg Kurz <gkurz@linux.vnet.ibm.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
---
 net/net.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/net.c b/net/net.c
index 28a5597..8e96011 100644
--- a/net/net.c
+++ b/net/net.c
@@ -517,20 +517,28 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
 
 int qemu_set_vnet_le(NetClientState *nc, bool is_le)
 {
+#ifdef HOST_WORDS_BIGENDIAN
     if (!nc || !nc->info->set_vnet_le) {
         return -ENOSYS;
     }
 
     return nc->info->set_vnet_le(nc, is_le);
+#else
+    return 0;
+#endif
 }
 
 int qemu_set_vnet_be(NetClientState *nc, bool is_be)
 {
+#ifdef HOST_WORDS_BIGENDIAN
+    return 0;
+#else
     if (!nc || !nc->info->set_vnet_be) {
         return -ENOSYS;
     }
 
     return nc->info->set_vnet_be(nc, is_be);
+#endif
 }
 
 int qemu_can_send_packet(NetClientState *sender)
-- 
MST

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PULL 4/7] tests: re-enable vhost-user-test
  2015-10-16  8:49 [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Michael S. Tsirkin
                   ` (2 preceding siblings ...)
  2015-10-16  8:49 ` [Qemu-devel] [PULL 3/7] net: don't set native endianness Michael S. Tsirkin
@ 2015-10-16  8:49 ` Michael S. Tsirkin
  2015-10-16  8:49 ` [Qemu-devel] [PULL 5/7] vhost: add vhost_has_free_slot() interface Michael S. Tsirkin
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Michael S. Tsirkin @ 2015-10-16  8:49 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Michael Tokarev, Markus Armbruster, Gerd Hoffmann,
	Stefan Hajnoczi, =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?=,
	Paolo Bonzini

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Commit 7fe34ca9c2e actually disabled vhost-user-test altogether,
since CONFIG_VHOST_NET is a per-target config variable.

tests/vhost-user-test is already x86/64 softmmu specific test, in order
to enable it correctly, kvm & vhost-net are also conditions. To check
that, set CONFIG_VHOST_NET_TEST when kvm is also enabled.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 configure      | 1 +
 tests/Makefile | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index 2d2a498..dde8b66 100755
--- a/configure
+++ b/configure
@@ -5522,6 +5522,7 @@ case "$target_name" in
       echo "CONFIG_KVM=y" >> $config_target_mak
       if test "$vhost_net" = "yes" ; then
         echo "CONFIG_VHOST_NET=y" >> $config_target_mak
+        echo "CONFIG_VHOST_NET_TEST=y" >> $config_host_mak
       fi
     fi
 esac
diff --git a/tests/Makefile b/tests/Makefile
index e6474ba..15f004c 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -188,9 +188,7 @@ gcov-files-i386-y += hw/usb/hcd-xhci.c
 check-qtest-i386-y += tests/pc-cpu-test$(EXESUF)
 check-qtest-i386-y += tests/q35-test$(EXESUF)
 gcov-files-i386-y += hw/pci-host/q35.c
-ifeq ($(CONFIG_VHOST_NET),y)
-check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF)
-endif
+check-qtest-i386-$(CONFIG_VHOST_NET_TEST) += tests/vhost-user-test$(EXESUF)
 check-qtest-x86_64-y = $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
 gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
-- 
MST

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PULL 5/7] vhost: add vhost_has_free_slot() interface
  2015-10-16  8:49 [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Michael S. Tsirkin
                   ` (3 preceding siblings ...)
  2015-10-16  8:49 ` [Qemu-devel] [PULL 4/7] tests: re-enable vhost-user-test Michael S. Tsirkin
@ 2015-10-16  8:49 ` Michael S. Tsirkin
  2015-10-16  8:49 ` [Qemu-devel] [PULL 6/7] pc-dimm: add vhost slots limit check before commiting to hotplug Michael S. Tsirkin
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Michael S. Tsirkin @ 2015-10-16  8:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Igor Mammedov

From: Igor Mammedov <imammedo@redhat.com>

it will allow for other parts of QEMU check if it's safe
to map memory region during hotplug/runtime.
That way hotplug path will have a chance to cancel
hotplug operation instead of crashing in vhost_commit().

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/virtio/vhost-backend.h |  2 ++
 include/hw/virtio/vhost.h         |  2 ++
 hw/virtio/vhost-backend.c         | 19 +++++++++++++++++++
 hw/virtio/vhost-user.c            |  6 ++++++
 hw/virtio/vhost.c                 | 21 +++++++++++++++++++++
 stubs/vhost.c                     |  6 ++++++
 stubs/Makefile.objs               |  1 +
 7 files changed, 57 insertions(+)
 create mode 100644 stubs/vhost.c

diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index 3a0f6e2..7d4a8ad 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -26,6 +26,7 @@ typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque);
 typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev);
 typedef int (*vhost_backend_get_vq_index)(struct vhost_dev *dev, int idx);
 typedef int (*vhost_backend_set_vring_enable)(struct vhost_dev *dev, int enable);
+typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev);
 
 typedef struct VhostOps {
     VhostBackendType backend_type;
@@ -34,6 +35,7 @@ typedef struct VhostOps {
     vhost_backend_cleanup vhost_backend_cleanup;
     vhost_backend_get_vq_index vhost_backend_get_vq_index;
     vhost_backend_set_vring_enable vhost_backend_set_vring_enable;
+    vhost_backend_memslots_limit vhost_backend_memslots_limit;
 } VhostOps;
 
 extern const VhostOps user_ops;
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index c3758f3..080831e 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -59,6 +59,7 @@ struct vhost_dev {
     const VhostOps *vhost_ops;
     void *opaque;
     struct vhost_log *log;
+    QLIST_ENTRY(vhost_dev) entry;
 };
 
 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
@@ -83,4 +84,5 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
                             uint64_t features);
 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
                         uint64_t features);
+bool vhost_has_free_slot(void);
 #endif
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 72d1392..910d8e0 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -11,6 +11,7 @@
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-backend.h"
 #include "qemu/error-report.h"
+#include "linux/vhost.h"
 
 #include <sys/ioctl.h>
 
@@ -49,12 +50,30 @@ static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx)
     return idx - dev->vq_index;
 }
 
+static int vhost_kernel_memslots_limit(struct vhost_dev *dev)
+{
+    int limit = 64;
+    char *s;
+
+    if (g_file_get_contents("/sys/module/vhost/parameters/max_mem_regions",
+                            &s, NULL, NULL)) {
+        uint64_t val = g_ascii_strtoull(s, NULL, 10);
+        if (!((val == G_MAXUINT64 || !val) && errno)) {
+            return val;
+        }
+        error_report("ignoring invalid max_mem_regions value in vhost module:"
+                     " %s", s);
+    }
+    return limit;
+}
+
 static const VhostOps kernel_ops = {
         .backend_type = VHOST_BACKEND_TYPE_KERNEL,
         .vhost_call = vhost_kernel_call,
         .vhost_backend_init = vhost_kernel_init,
         .vhost_backend_cleanup = vhost_kernel_cleanup,
         .vhost_backend_get_vq_index = vhost_kernel_get_vq_index,
+        .vhost_backend_memslots_limit = vhost_kernel_memslots_limit,
 };
 
 int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index b11c0d2..9585637 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -440,6 +440,11 @@ static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
     return idx;
 }
 
+static int vhost_user_memslots_limit(struct vhost_dev *dev)
+{
+    return VHOST_MEMORY_MAX_NREGIONS;
+}
+
 const VhostOps user_ops = {
         .backend_type = VHOST_BACKEND_TYPE_USER,
         .vhost_call = vhost_user_call,
@@ -447,4 +452,5 @@ const VhostOps user_ops = {
         .vhost_backend_cleanup = vhost_user_cleanup,
         .vhost_backend_get_vq_index = vhost_user_get_vq_index,
         .vhost_backend_set_vring_enable = vhost_user_set_vring_enable,
+        .vhost_backend_memslots_limit = vhost_user_memslots_limit,
 };
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index c0ed5b2..a3b4f9e 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -26,6 +26,22 @@
 
 static struct vhost_log *vhost_log;
 
+static unsigned int used_memslots;
+static QLIST_HEAD(, vhost_dev) vhost_devices =
+    QLIST_HEAD_INITIALIZER(vhost_devices);
+
+bool vhost_has_free_slot(void)
+{
+    unsigned int slots_limit = ~0U;
+    struct vhost_dev *hdev;
+
+    QLIST_FOREACH(hdev, &vhost_devices, entry) {
+        unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
+        slots_limit = MIN(slots_limit, r);
+    }
+    return slots_limit > used_memslots;
+}
+
 static void vhost_dev_sync_region(struct vhost_dev *dev,
                                   MemoryRegionSection *section,
                                   uint64_t mfirst, uint64_t mlast,
@@ -457,6 +473,7 @@ static void vhost_set_memory(MemoryListener *listener,
     dev->mem_changed_start_addr = MIN(dev->mem_changed_start_addr, start_addr);
     dev->mem_changed_end_addr = MAX(dev->mem_changed_end_addr, start_addr + size - 1);
     dev->memory_changed = true;
+    used_memslots = dev->mem->nregions;
 }
 
 static bool vhost_section(MemoryRegionSection *section)
@@ -916,6 +933,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
         return -errno;
     }
 
+    QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
+
     r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL);
     if (r < 0) {
         goto fail;
@@ -972,6 +991,7 @@ fail_vq:
 fail:
     r = -errno;
     hdev->vhost_ops->vhost_backend_cleanup(hdev);
+    QLIST_REMOVE(hdev, entry);
     return r;
 }
 
@@ -989,6 +1009,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
     g_free(hdev->mem);
     g_free(hdev->mem_sections);
     hdev->vhost_ops->vhost_backend_cleanup(hdev);
+    QLIST_REMOVE(hdev, entry);
 }
 
 /* Stop processing guest IO notifications in qemu.
diff --git a/stubs/vhost.c b/stubs/vhost.c
new file mode 100644
index 0000000..d346b85
--- /dev/null
+++ b/stubs/vhost.c
@@ -0,0 +1,6 @@
+#include "hw/virtio/vhost.h"
+
+bool vhost_has_free_slot(void)
+{
+    return true;
+}
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 85e4e81..ce6ce11 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -39,3 +39,4 @@ stub-obj-y += cpus.o
 stub-obj-y += kvm.o
 stub-obj-y += qmp_pc_dimm_device_list.o
 stub-obj-y += target-monitor-defs.o
+stub-obj-y += vhost.o
-- 
MST

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PULL 6/7] pc-dimm: add vhost slots limit check before commiting to hotplug
  2015-10-16  8:49 [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Michael S. Tsirkin
                   ` (4 preceding siblings ...)
  2015-10-16  8:49 ` [Qemu-devel] [PULL 5/7] vhost: add vhost_has_free_slot() interface Michael S. Tsirkin
@ 2015-10-16  8:49 ` Michael S. Tsirkin
  2015-10-16  8:49 ` [Qemu-devel] [PULL 7/7] vhost: fail backend intialization early Michael S. Tsirkin
  2015-10-16 18:11 ` [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Peter Maydell
  7 siblings, 0 replies; 10+ messages in thread
From: Michael S. Tsirkin @ 2015-10-16  8:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Igor Mammedov

From: Igor Mammedov <imammedo@redhat.com>

it allows safely cancel memory hotplug if vhost backend
doesn't support necessary amount of memory slots and prevents
QEMU crashing in vhost due to hitting vhost limit on amount
of supported memory ranges.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/mem/pc-dimm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 506fe0d..2bae994 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -25,6 +25,7 @@
 #include "sysemu/numa.h"
 #include "sysemu/kvm.h"
 #include "trace.h"
+#include "hw/virtio/vhost.h"
 
 typedef struct pc_dimms_capacity {
      uint64_t size;
@@ -96,6 +97,12 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
         goto out;
     }
 
+    if (!vhost_has_free_slot()) {
+        error_setg(&local_err, "a used vhost backend has no free"
+                               " memory slots left");
+        goto out;
+    }
+
     memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
     vmstate_register_ram(mr, dev);
     numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
-- 
MST

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PULL 7/7] vhost: fail backend intialization early
  2015-10-16  8:49 [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Michael S. Tsirkin
                   ` (5 preceding siblings ...)
  2015-10-16  8:49 ` [Qemu-devel] [PULL 6/7] pc-dimm: add vhost slots limit check before commiting to hotplug Michael S. Tsirkin
@ 2015-10-16  8:49 ` Michael S. Tsirkin
  2015-10-16 18:11 ` [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Peter Maydell
  7 siblings, 0 replies; 10+ messages in thread
From: Michael S. Tsirkin @ 2015-10-16  8:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Igor Mammedov

From: Igor Mammedov <imammedo@redhat.com>

Don't initialize vhost backend if memslots number exceeds the supported
limit. This prevents failures down the road when backend
is actually started.

[MST: rewrite commit log]

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/vhost.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index a3b4f9e..f14a5c5 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -933,6 +933,12 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
         return -errno;
     }
 
+    if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
+        fprintf(stderr, "vhost backend memory slots limit is less"
+                " than current number of present memory slots\n");
+        close((uintptr_t)opaque);
+        return -1;
+    }
     QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
 
     r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL);
-- 
MST

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PULL 0/7] vhost, pc features, fixes
  2015-10-16  8:49 [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Michael S. Tsirkin
                   ` (6 preceding siblings ...)
  2015-10-16  8:49 ` [Qemu-devel] [PULL 7/7] vhost: fail backend intialization early Michael S. Tsirkin
@ 2015-10-16 18:11 ` Peter Maydell
  7 siblings, 0 replies; 10+ messages in thread
From: Peter Maydell @ 2015-10-16 18:11 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: QEMU Developers

On 16 October 2015 at 09:49, Michael S. Tsirkin <mst@redhat.com> wrote:
> The following changes since commit b37686f7e84b22cfaf7fd01ac5133f2617cc3027:
>
>   Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging (2015-10-09 12:18:14 +0100)
>
> are available in the git repository at:
>
>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
>
> for you to fetch changes up to 7f31381190755a1aa22c5cd486723b674ab83369:
>
>   vhost: fail backend intialization early (2015-10-16 11:44:15 +0300)
>
> ----------------------------------------------------------------
> vhost, pc features, fixes
>
> New features:
>     VT-d support for devices behind a bridge
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

Merge conflict in tests/Makefile. Looks trivial but I have a big long
queue to process at the moment, so I'm just going to bounce this one,
I'm afraid.

thanks
-- PMM

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PULL 2/7] exec: factor out duplicate mmap code
  2015-10-16  8:49 ` [Qemu-devel] [PULL 2/7] exec: factor out duplicate mmap code Michael S. Tsirkin
@ 2015-10-25 15:04   ` Paolo Bonzini
  0 siblings, 0 replies; 10+ messages in thread
From: Paolo Bonzini @ 2015-10-25 15:04 UTC (permalink / raw)
  To: Michael S. Tsirkin, qemu-devel; +Cc: Peter Maydell, Marc-André Lureau



On 16/10/2015 10:49, Michael S. Tsirkin wrote:
> Anonymous and file-backed RAM allocation are now almost exactly the same.
> 
> Reduce code duplication by moving RAM mmap code out of oslib-posix.c and
> exec.c.
> 
> Reported-by: Marc-André Lureau <mlureau@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> Acked-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  include/qemu/mmap-alloc.h | 10 +++++++
>  exec.c                    | 47 +++++++------------------------
>  util/mmap-alloc.c         | 71 +++++++++++++++++++++++++++++++++++++++++++++++
>  util/oslib-posix.c        | 28 +++----------------
>  util/Makefile.objs        |  1 +
>  5 files changed, 96 insertions(+), 61 deletions(-)
>  create mode 100644 include/qemu/mmap-alloc.h
>  create mode 100644 util/mmap-alloc.c
> 
> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
> new file mode 100644
> index 0000000..56388e6
> --- /dev/null
> +++ b/include/qemu/mmap-alloc.h
> @@ -0,0 +1,10 @@
> +#ifndef QEMU_MMAP_ALLOC
> +#define QEMU_MMAP_ALLOC
> +
> +#include "qemu-common.h"
> +
> +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
> +
> +void qemu_ram_munmap(void *ptr, size_t size);
> +
> +#endif
> diff --git a/exec.c b/exec.c
> index 7d90a52..4505dc7 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -55,6 +55,9 @@
>  #include "exec/ram_addr.h"
>  
>  #include "qemu/range.h"
> +#ifndef _WIN32
> +#include "qemu/mmap-alloc.h"
> +#endif
>  
>  //#define DEBUG_SUBPAGE
>  
> @@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned;
>   */
>  #define RAM_RESIZEABLE (1 << 2)
>  
> -/* An extra page is mapped on top of this RAM.
> +/* RAM is backed by an mmapped file.
>   */
> -#define RAM_EXTRA (1 << 3)
> +#define RAM_FILE (1 << 3)
>  #endif
>  
>  struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
> @@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block,
>      char *filename;
>      char *sanitized_name;
>      char *c;
> -    void *ptr;
> -    void *area = NULL;
> +    void *area;
>      int fd;
>      uint64_t hpagesize;
> -    uint64_t total;
>      Error *local_err = NULL;
> -    size_t offset;
>  
>      hpagesize = gethugepagesize(path, &local_err);
>      if (local_err) {
> @@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block,
>      g_free(filename);
>  
>      memory = ROUND_UP(memory, hpagesize);
> -    total = memory + hpagesize;
>  
>      /*
>       * ftruncate is not supported by hugetlbfs in older
> @@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block,
>          perror("ftruncate");
>      }
>  
> -    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
> -                -1, 0);
> -    if (ptr == MAP_FAILED) {
> -        error_setg_errno(errp, errno,
> -                         "unable to allocate memory range for hugepages");
> -        close(fd);
> -        goto error;
> -    }
> -
> -    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
> -
> -    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
> -                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
> -                MAP_FIXED,
> -                fd, 0);
> +    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
>      if (area == MAP_FAILED) {
>          error_setg_errno(errp, errno,
>                           "unable to map backing store for hugepages");
> -        munmap(ptr, total);
>          close(fd);
>          goto error;
>      }
>  
> -    if (offset > 0) {
> -        munmap(ptr, offset);
> -    }
> -    ptr += offset;
> -    total -= offset;
> -
> -    if (total > memory + getpagesize()) {
> -        munmap(ptr + memory + getpagesize(),
> -               total - memory - getpagesize());
> -    }
> -
>      if (mem_prealloc) {
>          os_mem_prealloc(fd, area, memory);
>      }
> @@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
>      new_block->used_length = size;
>      new_block->max_length = size;
>      new_block->flags = share ? RAM_SHARED : 0;
> -    new_block->flags |= RAM_EXTRA;
> +    new_block->flags |= RAM_FILE;
>      new_block->host = file_ram_alloc(new_block, size,
>                                       mem_path, errp);
>      if (!new_block->host) {
> @@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block)
>          xen_invalidate_map_cache_entry(block->host);
>  #ifndef _WIN32
>      } else if (block->fd >= 0) {
> -        if (block->flags & RAM_EXTRA) {
> -            munmap(block->host, block->max_length + getpagesize());
> +        if (block->flags & RAM_FILE) {
> +            qemu_ram_munmap(block->host, block->max_length);
>          } else {
>              munmap(block->host, block->max_length);
>          }
> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> new file mode 100644
> index 0000000..1394269
> --- /dev/null
> +++ b/util/mmap-alloc.c
> @@ -0,0 +1,71 @@
> +/*
> + * Support for RAM backed by mmaped host memory.
> + *
> + * Copyright (c) 2015 Red Hat, Inc.
> + *
> + * Authors:
> + *  Michael S. Tsirkin <mst@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +#include <qemu/mmap-alloc.h>
> +#include <sys/types.h>
> +#include <sys/mman.h>
> +#include <assert.h>
> +
> +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
> +{
> +    /*
> +     * Note: this always allocates at least one extra page of virtual address
> +     * space, even if size is already aligned.
> +     */
> +    size_t total = size + align;
> +    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
> +    void *ptr1;
> +
> +    if (ptr == MAP_FAILED) {
> +        return NULL;

Existing code is checking for MAP_FAILED (and NULL is a valid return
value of mmap), so you should return ptr.

> +    }
> +
> +    /* Make sure align is a power of 2 */
> +    assert(!(align & (align - 1)));
> +    /* Always align to host page size */
> +    assert(align >= getpagesize());
> +
> +    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
> +                MAP_FIXED |
> +                (fd == -1 ? MAP_ANONYMOUS : 0) |
> +                (shared ? MAP_SHARED : MAP_PRIVATE),
> +                fd, 0);
> +    if (ptr1 == MAP_FAILED) {
> +        munmap(ptr, total);
> +        return NULL;

Same here---return ptr1 instead of NULL.

Paolo

> +    }
> +
> +    ptr += offset;
> +    total -= offset;
> +
> +    if (offset > 0) {
> +        munmap(ptr - offset, offset);
> +    }
> +
> +    /*
> +     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
> +     * a guard page guarding against potential buffer overflows.
> +     */
> +    if (total > size + getpagesize()) {
> +        munmap(ptr + size + getpagesize(), total - size - getpagesize());
> +    }
> +
> +    return ptr;
> +}
> +
> +void qemu_ram_munmap(void *ptr, size_t size)
> +{
> +    if (ptr) {
> +        /* Unmap both the RAM block and the guard page */
> +        munmap(ptr, size + getpagesize());
> +    }
> +}
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index a0fcdc2..892d2d8 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -72,6 +72,8 @@ extern int daemon(int, int);
>  #include <sys/sysctl.h>
>  #endif
>  
> +#include <qemu/mmap-alloc.h>
> +
>  int qemu_get_thread_id(void)
>  {
>  #if defined(__linux__)
> @@ -128,10 +130,7 @@ void *qemu_memalign(size_t alignment, size_t size)
>  void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
>  {
>      size_t align = QEMU_VMALLOC_ALIGN;
> -    size_t total = size + align;
> -    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> -    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
> -    void *ptr1;
> +    void *ptr = qemu_ram_mmap(-1, size, align, false);
>  
>      if (ptr == MAP_FAILED) {
>          return NULL;
> @@ -141,23 +140,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
>          *alignment = align;
>      }
>  
> -    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
> -                MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> -    if (ptr1 == MAP_FAILED) {
> -        munmap(ptr, total);
> -        return NULL;
> -    }
> -
> -    ptr += offset;
> -    total -= offset;
> -
> -    if (offset > 0) {
> -        munmap(ptr - offset, offset);
> -    }
> -    if (total > size + getpagesize()) {
> -        munmap(ptr + size + getpagesize(), total - size - getpagesize());
> -    }
> -
>      trace_qemu_anon_ram_alloc(size, ptr);
>      return ptr;
>  }
> @@ -171,9 +153,7 @@ void qemu_vfree(void *ptr)
>  void qemu_anon_ram_free(void *ptr, size_t size)
>  {
>      trace_qemu_anon_ram_free(ptr, size);
> -    if (ptr) {
> -        munmap(ptr, size + getpagesize());
> -    }
> +    qemu_ram_munmap(ptr, size);
>  }
>  
>  void qemu_set_block(int fd)
> diff --git a/util/Makefile.objs b/util/Makefile.objs
> index 114d657..f011b81 100644
> --- a/util/Makefile.objs
> +++ b/util/Makefile.objs
> @@ -1,6 +1,7 @@
>  util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
>  util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
>  util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o
> +util-obj-$(CONFIG_POSIX) += mmap-alloc.o
>  util-obj-y += envlist.o path.o module.o
>  util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o
>  util-obj-y += bitmap.o bitops.o hbitmap.o
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-10-25 15:04 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-16  8:49 [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Michael S. Tsirkin
2015-10-16  8:49 ` [Qemu-devel] [PULL 1/7] intel_iommu: Add support for translation for devices behind bridges Michael S. Tsirkin
2015-10-16  8:49 ` [Qemu-devel] [PULL 2/7] exec: factor out duplicate mmap code Michael S. Tsirkin
2015-10-25 15:04   ` Paolo Bonzini
2015-10-16  8:49 ` [Qemu-devel] [PULL 3/7] net: don't set native endianness Michael S. Tsirkin
2015-10-16  8:49 ` [Qemu-devel] [PULL 4/7] tests: re-enable vhost-user-test Michael S. Tsirkin
2015-10-16  8:49 ` [Qemu-devel] [PULL 5/7] vhost: add vhost_has_free_slot() interface Michael S. Tsirkin
2015-10-16  8:49 ` [Qemu-devel] [PULL 6/7] pc-dimm: add vhost slots limit check before commiting to hotplug Michael S. Tsirkin
2015-10-16  8:49 ` [Qemu-devel] [PULL 7/7] vhost: fail backend intialization early Michael S. Tsirkin
2015-10-16 18:11 ` [Qemu-devel] [PULL 0/7] vhost, pc features, fixes Peter Maydell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.