All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexandru Elisei <alexandru.elisei@arm.com>
To: kvm@vger.kernel.org
Cc: will@kernel.org, julien.thierry.kdev@gmail.com,
	andre.przywara@arm.com, sami.mujawar@arm.com,
	lorenzo.pieralisi@arm.com
Subject: [PATCH v3 kvmtool 22/32] vfio: Destroy memslot when unmapping the associated VAs
Date: Thu, 26 Mar 2020 15:24:28 +0000	[thread overview]
Message-ID: <20200326152438.6218-23-alexandru.elisei@arm.com> (raw)
In-Reply-To: <20200326152438.6218-1-alexandru.elisei@arm.com>

When we want to map a device region into the guest address space, first we
perform an mmap on the device fd. The resulting VMA is a mapping between
host userspace addresses and physical addresses associated with the device.
Next, we create a memslot, which populates the stage 2 table with the
mappings between guest physical addresses and the device physical adresses.

However, when we want to unmap the device from the guest address space, we
only call munmap, which destroys the VMA and the stage 2 mappings, but
doesn't destroy the memslot and kvmtool's internal mem_bank structure
associated with the memslot.

This has been perfectly fine so far, because we only unmap a device region
when we exit kvmtool. This is will change when we add support for
reassignable BARs, and we will have to unmap vfio regions as the guest
kernel writes new addresses in the BARs. This can lead to two possible
problems:

- We refuse to create a valid BAR mapping because of a stale mem_bank
  structure which belonged to a previously unmapped region.

- It is possible that the mmap in vfio_map_region returns the same address
  that was used to create a memslot, but was unmapped by vfio_unmap_region.
  Guest accesses to the device memory will fault because the stage 2
  mappings are missing, and this can lead to performance degradation.

Let's do the right thing and destroy the memslot and the mem_bank struct
associated with it when we unmap a vfio region. Set host_addr to NULL after
the munmap call so we won't try to unmap an address which is currently used
by the process for something else if vfio_unmap_region gets called twice.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---
 include/kvm/kvm.h |   4 ++
 kvm.c             | 101 ++++++++++++++++++++++++++++++++++++++++------
 vfio/core.c       |   6 +++
 3 files changed, 99 insertions(+), 12 deletions(-)

diff --git a/include/kvm/kvm.h b/include/kvm/kvm.h
index 50119a8672eb..9428f57a1c0c 100644
--- a/include/kvm/kvm.h
+++ b/include/kvm/kvm.h
@@ -1,6 +1,7 @@
 #ifndef KVM__KVM_H
 #define KVM__KVM_H
 
+#include "kvm/mutex.h"
 #include "kvm/kvm-arch.h"
 #include "kvm/kvm-config.h"
 #include "kvm/util-init.h"
@@ -56,6 +57,7 @@ struct kvm_mem_bank {
 	void			*host_addr;
 	u64			size;
 	enum kvm_mem_type	type;
+	u32			slot;
 };
 
 struct kvm {
@@ -72,6 +74,7 @@ struct kvm {
 	u64			ram_size;
 	void			*ram_start;
 	u64			ram_pagesize;
+	struct mutex		mem_banks_lock;
 	struct list_head	mem_banks;
 
 	bool			nmi_disabled;
@@ -106,6 +109,7 @@ void kvm__irq_line(struct kvm *kvm, int irq, int level);
 void kvm__irq_trigger(struct kvm *kvm, int irq);
 bool kvm__emulate_io(struct kvm_cpu *vcpu, u16 port, void *data, int direction, int size, u32 count);
 bool kvm__emulate_mmio(struct kvm_cpu *vcpu, u64 phys_addr, u8 *data, u32 len, u8 is_write);
+int kvm__destroy_mem(struct kvm *kvm, u64 guest_phys, u64 size, void *userspace_addr);
 int kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size, void *userspace_addr,
 		      enum kvm_mem_type type);
 static inline int kvm__register_ram(struct kvm *kvm, u64 guest_phys, u64 size,
diff --git a/kvm.c b/kvm.c
index 57c4ff98ec4c..26f6b9bc58a3 100644
--- a/kvm.c
+++ b/kvm.c
@@ -157,6 +157,7 @@ struct kvm *kvm__new(void)
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
 
+	mutex_init(&kvm->mem_banks_lock);
 	kvm->sys_fd = -1;
 	kvm->vm_fd = -1;
 
@@ -183,20 +184,84 @@ int kvm__exit(struct kvm *kvm)
 }
 core_exit(kvm__exit);
 
+int kvm__destroy_mem(struct kvm *kvm, u64 guest_phys, u64 size,
+		     void *userspace_addr)
+{
+	struct kvm_userspace_memory_region mem;
+	struct kvm_mem_bank *bank;
+	int ret;
+
+	mutex_lock(&kvm->mem_banks_lock);
+	list_for_each_entry(bank, &kvm->mem_banks, list)
+		if (bank->guest_phys_addr == guest_phys &&
+		    bank->size == size && bank->host_addr == userspace_addr)
+			break;
+
+	if (&bank->list == &kvm->mem_banks) {
+		pr_err("Region [%llx-%llx] not found", guest_phys,
+		       guest_phys + size - 1);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (bank->type == KVM_MEM_TYPE_RESERVED) {
+		pr_err("Cannot delete reserved region [%llx-%llx]",
+		       guest_phys, guest_phys + size - 1);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	mem = (struct kvm_userspace_memory_region) {
+		.slot			= bank->slot,
+		.guest_phys_addr	= guest_phys,
+		.memory_size		= 0,
+		.userspace_addr		= (unsigned long)userspace_addr,
+	};
+
+	ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
+	if (ret < 0) {
+		ret = -errno;
+		goto out;
+	}
+
+	list_del(&bank->list);
+	free(bank);
+	kvm->mem_slots--;
+	ret = 0;
+
+out:
+	mutex_unlock(&kvm->mem_banks_lock);
+	return ret;
+}
+
 int kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size,
 		      void *userspace_addr, enum kvm_mem_type type)
 {
 	struct kvm_userspace_memory_region mem;
 	struct kvm_mem_bank *merged = NULL;
 	struct kvm_mem_bank *bank;
+	struct list_head *prev_entry;
+	u32 slot;
 	int ret;
 
-	/* Check for overlap */
+	mutex_lock(&kvm->mem_banks_lock);
+	/* Check for overlap and find first empty slot. */
+	slot = 0;
+	prev_entry = &kvm->mem_banks;
 	list_for_each_entry(bank, &kvm->mem_banks, list) {
 		u64 bank_end = bank->guest_phys_addr + bank->size - 1;
 		u64 end = guest_phys + size - 1;
-		if (guest_phys > bank_end || end < bank->guest_phys_addr)
+		if (guest_phys > bank_end || end < bank->guest_phys_addr) {
+			/*
+			 * Keep the banks sorted ascending by slot, so it's
+			 * easier for us to find a free slot.
+			 */
+			if (bank->slot == slot) {
+				slot++;
+				prev_entry = &bank->list;
+			}
 			continue;
+		}
 
 		/* Merge overlapping reserved regions */
 		if (bank->type == KVM_MEM_TYPE_RESERVED &&
@@ -226,38 +291,50 @@ int kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size,
 		       kvm_mem_type_to_string(bank->type), bank->guest_phys_addr,
 		       bank->guest_phys_addr + bank->size - 1);
 
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
-	if (merged)
-		return 0;
+	if (merged) {
+		ret = 0;
+		goto out;
+	}
 
 	bank = malloc(sizeof(*bank));
-	if (!bank)
-		return -ENOMEM;
+	if (!bank) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	INIT_LIST_HEAD(&bank->list);
 	bank->guest_phys_addr		= guest_phys;
 	bank->host_addr			= userspace_addr;
 	bank->size			= size;
 	bank->type			= type;
+	bank->slot			= slot;
 
 	if (type != KVM_MEM_TYPE_RESERVED) {
 		mem = (struct kvm_userspace_memory_region) {
-			.slot			= kvm->mem_slots++,
+			.slot			= slot,
 			.guest_phys_addr	= guest_phys,
 			.memory_size		= size,
 			.userspace_addr		= (unsigned long)userspace_addr,
 		};
 
 		ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
-		if (ret < 0)
-			return -errno;
+		if (ret < 0) {
+			ret = -errno;
+			goto out;
+		}
 	}
 
-	list_add(&bank->list, &kvm->mem_banks);
+	list_add(&bank->list, prev_entry);
+	kvm->mem_slots++;
+	ret = 0;
 
-	return 0;
+out:
+	mutex_unlock(&kvm->mem_banks_lock);
+	return ret;
 }
 
 void *guest_flat_to_host(struct kvm *kvm, u64 offset)
diff --git a/vfio/core.c b/vfio/core.c
index 0ed1e6fee6bf..b80ebf3bb913 100644
--- a/vfio/core.c
+++ b/vfio/core.c
@@ -256,8 +256,14 @@ int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev,
 
 void vfio_unmap_region(struct kvm *kvm, struct vfio_region *region)
 {
+	u64 map_size;
+
 	if (region->host_addr) {
+		map_size = ALIGN(region->info.size, PAGE_SIZE);
+		kvm__destroy_mem(kvm, region->guest_phys_addr, map_size,
+				 region->host_addr);
 		munmap(region->host_addr, region->info.size);
+		region->host_addr = NULL;
 	} else if (region->is_ioport) {
 		ioport__unregister(kvm, region->port_base);
 	} else {
-- 
2.20.1


  parent reply	other threads:[~2020-03-26 15:25 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-26 15:24 [PATCH v3 kvmtool 00/32] Add reassignable BARs and PCIE 1.1 support Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 01/32] Makefile: Use correct objcopy binary when cross-compiling for x86_64 Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 02/32] hw/i8042: Compile only for x86 Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 03/32] pci: Fix BAR resource sizing arbitration Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 04/32] Remove pci-shmem device Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 05/32] Check that a PCI device's memory size is power of two Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 06/32] arm/pci: Advertise only PCI bus 0 in the DT Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 07/32] ioport: pci: Move port allocations to PCI devices Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 08/32] pci: Fix ioport allocation size Alexandru Elisei
2020-03-30  9:27   ` André Przywara
2020-03-26 15:24 ` [PATCH v3 kvmtool 09/32] virtio/pci: Make memory and IO BARs independent Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 10/32] vfio/pci: Allocate correct size for MSIX table and PBA BARs Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 11/32] vfio/pci: Don't assume that only even numbered BARs are 64bit Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 12/32] vfio/pci: Ignore expansion ROM BAR writes Alexandru Elisei
2020-03-30  9:29   ` André Przywara
2020-03-26 15:24 ` [PATCH v3 kvmtool 13/32] vfio/pci: Don't access unallocated regions Alexandru Elisei
2020-03-30  9:29   ` André Przywara
2020-03-26 15:24 ` [PATCH v3 kvmtool 14/32] virtio: Don't ignore initialization failures Alexandru Elisei
2020-03-30  9:30   ` André Przywara
2020-04-14 10:27     ` Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 15/32] Don't ignore errors registering a device, ioport or mmio emulation Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 16/32] hw/vesa: Don't ignore fatal errors Alexandru Elisei
2020-03-30  9:30   ` André Przywara
2020-03-26 15:24 ` [PATCH v3 kvmtool 17/32] hw/vesa: Set the size for BAR 0 Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 18/32] ioport: Fail when registering overlapping ports Alexandru Elisei
2020-03-30 11:13   ` André Przywara
2020-03-26 15:24 ` [PATCH v3 kvmtool 19/32] ioport: mmio: Use a mutex and reference counting for locking Alexandru Elisei
2020-03-31 11:51   ` André Przywara
2020-05-01 15:30     ` Alexandru Elisei
2020-05-06 17:40       ` Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 20/32] pci: Add helpers for BAR values and memory/IO space access Alexandru Elisei
2020-04-02  8:33   ` André Przywara
2020-03-26 15:24 ` [PATCH v3 kvmtool 21/32] virtio/pci: Get emulated region address from BARs Alexandru Elisei
2020-03-26 15:24 ` Alexandru Elisei [this message]
2020-04-02  8:33   ` [PATCH v3 kvmtool 22/32] vfio: Destroy memslot when unmapping the associated VAs André Przywara
2020-03-26 15:24 ` [PATCH v3 kvmtool 23/32] vfio: Reserve ioports when configuring the BAR Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 24/32] pci: Limit configuration transaction size to 32 bits Alexandru Elisei
2020-04-02  8:34   ` André Przywara
2020-05-04 13:00     ` Alexandru Elisei
2020-05-04 13:37       ` Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 25/32] vfio/pci: Don't write configuration value twice Alexandru Elisei
2020-04-02  8:35   ` André Przywara
2020-05-04 13:44     ` Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 26/32] vesa: Create device exactly once Alexandru Elisei
2020-04-02  8:58   ` André Przywara
2020-05-05 13:02     ` Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 27/32] pci: Implement callbacks for toggling BAR emulation Alexandru Elisei
2020-04-03 11:57   ` André Przywara
2020-04-03 18:14     ` Alexandru Elisei
2020-04-03 19:08       ` André Przywara
2020-05-05 13:30         ` Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 28/32] pci: Toggle BAR I/O and memory space emulation Alexandru Elisei
2020-04-06 14:03   ` André Przywara
2020-03-26 15:24 ` [PATCH v3 kvmtool 29/32] pci: Implement reassignable BARs Alexandru Elisei
2020-04-06 14:05   ` André Przywara
2020-05-06 12:05     ` Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 30/32] arm/fdt: Remove 'linux,pci-probe-only' property Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 31/32] vfio: Trap MMIO access to BAR addresses which aren't page aligned Alexandru Elisei
2020-03-26 15:24 ` [PATCH v3 kvmtool 32/32] arm/arm64: Add PCI Express 1.1 support Alexandru Elisei
2020-04-06 14:06   ` André Przywara
2020-04-14  8:56     ` Alexandru Elisei
2020-05-06 13:51     ` Alexandru Elisei
2020-05-12 14:17       ` André Przywara
2020-05-12 15:44         ` Alexandru Elisei
2020-05-13  8:17           ` André Przywara
2020-05-13 14:42             ` Alexandru Elisei
2021-06-04 16:46               ` Alexandru Elisei
2020-04-06 14:14 ` [PATCH v3 kvmtool 00/32] Add reassignable BARs and PCIE " André Przywara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200326152438.6218-23-alexandru.elisei@arm.com \
    --to=alexandru.elisei@arm.com \
    --cc=andre.przywara@arm.com \
    --cc=julien.thierry.kdev@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=lorenzo.pieralisi@arm.com \
    --cc=sami.mujawar@arm.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.