From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alex Williamson Subject: [RFC PATCH 2/2] device-assignment: Count required kvm memory slots Date: Fri, 21 Jan 2011 16:48:42 -0700 Message-ID: <20110121234831.22262.31177.stgit@s20.home> References: <20110121233040.22262.68117.stgit@s20.home> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Cc: alex.williamson@redhat.com, ddutile@redhat.com, mst@redhat.com, avi@redhat.com, chrisw@redhat.com, jan.kiszka@siemens.com To: kvm@vger.kernel.org Return-path: Received: from mx1.redhat.com ([209.132.183.28]:28486 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754462Ab1AUXsp (ORCPT ); Fri, 21 Jan 2011 18:48:45 -0500 In-Reply-To: <20110121233040.22262.68117.stgit@s20.home> Sender: kvm-owner@vger.kernel.org List-ID: Each MMIO PCI BAR of an assigned device is directly mapped via a KVM memory slot to avoid bouncing reads and writes through qemu. KVM only provides a (small) fixed number of these slots and attempting to exceed the unadvertised limit results in an abort. We can't reserve slots, but let's at least try to make an attempt to check whether there are enough available before adding a device. The non-hotplug case is troublesome here because we have no visibility as to what else might make use of these slots, but hasn't yet been mapped. We used to limit the number of devices that could be specified on the commandline using the -pcidevice option. The heuristic here seems to work and provides a similar limit. We can also avoid using these memory slots by allowing devices to bounce mmio access through qemu. This is trivially accomplished by adding a force_slow=on option to pci-assign. Signed-off-by: Alex Williamson --- hw/device-assignment.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++- hw/device-assignment.h | 3 ++ 2 files changed, 61 insertions(+), 1 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index e97f565..0063a11 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -546,7 +546,9 @@ static int assigned_dev_register_regions(PCIRegion *io_regions, ? PCI_BASE_ADDRESS_MEM_PREFETCH : PCI_BASE_ADDRESS_SPACE_MEMORY; - if (cur_region->size & 0xFFF) { + if (pci_dev->features & ASSIGNED_DEVICE_FORCE_SLOW_MASK) { + slow_map = 1; + } else if (cur_region->size & 0xFFF) { fprintf(stderr, "PCI region %d at address 0x%llx " "has size 0x%x, which is not a multiple of 4K. " "You might experience some performance hit " @@ -556,6 +558,10 @@ static int assigned_dev_register_regions(PCIRegion *io_regions, slow_map = 1; } + if (!slow_map) { + pci_dev->slots_needed++; + } + /* map physical memory */ pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; pci_dev->v_addrs[i].u.r_virtbase = mmap(NULL, cur_region->size, @@ -1666,6 +1672,30 @@ static CPUReadMemoryFunc *msix_mmio_read[] = { static int assigned_dev_register_msix_mmio(AssignedDevice *dev) { + int i; + PCIRegion *pci_region = dev->real_device.regions; + + /* Determine if the MSI-X table splits a BAR, requiring the use of + * two memory slots, one to map each remaining part. */ + if (!(dev->features & ASSIGNED_DEVICE_FORCE_SLOW_MASK)) { + for (i = 0; i < dev->real_device.region_number; i++, pci_region++) { + if (!pci_region->valid) { + continue; + } + + if (ranges_overlap(pci_region->base_addr, pci_region->size, + dev->msix_table_addr, 0x1000)) { + target_phys_addr_t offset; + + offset = dev->msix_table_addr - pci_region->base_addr; + if (offset && pci_region->size > offset + 0x1000) { + dev->slots_needed++; + } + break; + } + } + } + dev->msix_table_page = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); @@ -1768,6 +1798,31 @@ static int assigned_initfn(struct PCIDevice *pci_dev) if (assigned_dev_register_msix_mmio(dev)) goto assigned_out; + if (!(dev->features & ASSIGNED_DEVICE_FORCE_SLOW_MASK)) { + int free_slots = kvm_free_slots(); + int total_slots = dev->slots_needed; + + if (!dev->dev.qdev.hotplugged) { + AssignedDevice *adev; + + QLIST_FOREACH(adev, &devs, next) { + total_slots += adev->slots_needed; + } + + /* This seems to work, but it's completely heuristically + * determined. Any number of things might make use of kvm + * memory slots before the guest starts mapping memory BARs. + * This is really just a guess. */ + free_slots -= 13; + } + + if (total_slots > free_slots) { + error_report("pci-assign: Out of memory slots, need %d, have %d\n", + total_slots, free_slots); + goto assigned_out; + } + } + assigned_dev_load_option_rom(dev); QLIST_INSERT_HEAD(&devs, dev, next); @@ -1837,6 +1892,8 @@ static PCIDeviceInfo assign_info = { ASSIGNED_DEVICE_USE_IOMMU_BIT, true), DEFINE_PROP_BIT("prefer_msi", AssignedDevice, features, ASSIGNED_DEVICE_PREFER_MSI_BIT, true), + DEFINE_PROP_BIT("force_slow", AssignedDevice, features, + ASSIGNED_DEVICE_FORCE_SLOW_BIT, false), DEFINE_PROP_STRING("configfd", AssignedDevice, configfd_name), DEFINE_PROP_END_OF_LIST(), }, diff --git a/hw/device-assignment.h b/hw/device-assignment.h index c94a730..ad3cc80 100644 --- a/hw/device-assignment.h +++ b/hw/device-assignment.h @@ -76,9 +76,11 @@ typedef struct { #define ASSIGNED_DEVICE_USE_IOMMU_BIT 0 #define ASSIGNED_DEVICE_PREFER_MSI_BIT 1 +#define ASSIGNED_DEVICE_FORCE_SLOW_BIT 2 #define ASSIGNED_DEVICE_USE_IOMMU_MASK (1 << ASSIGNED_DEVICE_USE_IOMMU_BIT) #define ASSIGNED_DEVICE_PREFER_MSI_MASK (1 << ASSIGNED_DEVICE_PREFER_MSI_BIT) +#define ASSIGNED_DEVICE_FORCE_SLOW_MASK (1 << ASSIGNED_DEVICE_FORCE_SLOW_BIT) typedef struct AssignedDevice { PCIDevice dev; @@ -111,6 +113,7 @@ typedef struct AssignedDevice { int mmio_index; int need_emulate_cmd; char *configfd_name; + int slots_needed; QLIST_ENTRY(AssignedDevice) next; } AssignedDevice;