From: Fernando Carrijo <fcarrijo@yahoo.com.br>
To: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org, avi@redhat.com
Subject: Re: [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
Date: Tue, 22 Sep 2009 07:40:10 -0300 [thread overview]
Message-ID: <1253616010.6210.2.camel@pc-fernando> (raw)
In-Reply-To: <20090921234124.596305294@amt.cnet>
Resending with Cc: added
On Mon, 2009-09-21 at 20:37 -0300, Marcelo Tosatti wrote:
> plain text document attachment (introduce-srcu-and-use-for-slots)
> Use two steps for memslot deletion: mark the slot invalid (which stops
> instantiation of new shadow pages for that slot, but allows destruction),
> then instantiate the new empty slot.
>
> Also simplifies kvm_handle_hva locking.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm-slotslock/include/linux/kvm.h
> ===================================================================
> --- kvm-slotslock.orig/include/linux/kvm.h
> +++ kvm-slotslock/include/linux/kvm.h
> @@ -39,7 +39,7 @@ struct kvm_userspace_memory_region {
>
> /* for kvm_memory_region::flags */
> #define KVM_MEM_LOG_DIRTY_PAGES 1UL
> -
> +#define KVM_MEMSLOT_INVALID (1UL << 1)
>
> /* for KVM_IRQ_LINE */
> struct kvm_irq_level {
> Index: kvm-slotslock/virt/kvm/kvm_main.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/kvm_main.c
> +++ kvm-slotslock/virt/kvm/kvm_main.c
> @@ -43,6 +43,7 @@
> #include <linux/swap.h>
> #include <linux/bitops.h>
> #include <linux/spinlock.h>
> +#include <linux/srcu.h>
>
> #include <asm/processor.h>
> #include <asm/io.h>
> @@ -352,11 +353,15 @@ static struct kvm *kvm_create_vm(void)
> kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> if (!kvm->memslots)
> goto out_err;
> + if (init_srcu_struct(&kvm->srcu))
> + goto out_err;
>
> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
> page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> - if (!page)
> + if (!page) {
> + cleanup_srcu_struct(&kvm->srcu);
> goto out_err;
> + }
>
> kvm->coalesced_mmio_ring =
> (struct kvm_coalesced_mmio_ring *)page_address(page);
> @@ -367,6 +372,7 @@ static struct kvm *kvm_create_vm(void)
> kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
> r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
> if (r) {
> + cleanup_srcu_struct(&kvm->srcu);
> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
> put_page(page);
> #endif
> @@ -462,6 +468,7 @@ static void kvm_destroy_vm(struct kvm *k
> kvm_arch_flush_shadow(kvm);
> #endif
> kvm_arch_destroy_vm(kvm);
> + cleanup_srcu_struct(&kvm->srcu);
> hardware_disable_all();
> mmdrop(mm);
> }
> @@ -502,12 +509,13 @@ int __kvm_set_memory_region(struct kvm *
> struct kvm_userspace_memory_region *mem,
> int user_alloc)
> {
> - int r;
> + int r, flush_shadow = 0;
> gfn_t base_gfn;
> unsigned long npages;
> unsigned long i;
> struct kvm_memory_slot *memslot;
> struct kvm_memory_slot old, new;
> + struct kvm_memslots *slots, *old_memslots;
>
> r = -EINVAL;
> /* General sanity checks */
> @@ -569,15 +577,7 @@ int __kvm_set_memory_region(struct kvm *
> memset(new.rmap, 0, npages * sizeof(*new.rmap));
>
> new.user_alloc = user_alloc;
> - /*
> - * hva_to_rmmap() serialzies with the mmu_lock and to be
> - * safe it has to ignore memslots with !user_alloc &&
> - * !userspace_addr.
> - */
> - if (user_alloc)
> - new.userspace_addr = mem->userspace_addr;
> - else
> - new.userspace_addr = 0;
> + new.userspace_addr = mem->userspace_addr;
> }
> if (!npages)
> goto skip_lpage;
> @@ -632,8 +632,9 @@ skip_lpage:
> if (!new.dirty_bitmap)
> goto out_free;
> memset(new.dirty_bitmap, 0, dirty_bytes);
> + /* destroy any largepage mappings for dirty tracking */
> if (old.npages)
> - kvm_arch_flush_shadow(kvm);
> + flush_shadow = 1;
> }
> #else /* not defined CONFIG_S390 */
> new.user_alloc = user_alloc;
> @@ -641,34 +642,69 @@ skip_lpage:
> new.userspace_addr = mem->userspace_addr;
> #endif /* not defined CONFIG_S390 */
>
> - if (!npages)
> + if (!npages) {
> + slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> + if (!slots)
> + goto out_free;
> + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
Nothing wrong with the above line, but it makes me think if
*slots = *kvm->memslots;
would save us the function call overhead
> + if (mem->slot >= slots->nmemslots)
> + slots->nmemslots = mem->slot + 1;
> + slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
> +
> + old_memslots = kvm->memslots;
> + rcu_assign_pointer(kvm->memslots, slots);
> + synchronize_srcu(&kvm->srcu);
> + /* From this point no new shadow pages pointing to a deleted
> + * memslot will be created.
> + *
> + * validation of sp->gfn happens in:
> + * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
> + * - kvm_is_visible_gfn (mmu_check_roots)
> + */
> kvm_arch_flush_shadow(kvm);
> + kfree(old_memslots);
> + }
>
> r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
> if (r)
> goto out_free;
>
> - spin_lock(&kvm->mmu_lock);
> - if (mem->slot >= kvm->memslots->nmemslots)
> - kvm->memslots->nmemslots = mem->slot + 1;
> +#ifdef CONFIG_DMAR
> + /* map the pages in iommu page table */
> + if (npages)
> + r = kvm_iommu_map_pages(kvm, &new);
> + if (r)
> + goto out_free;
> +#endif
>
> - *memslot = new;
> - spin_unlock(&kvm->mmu_lock);
> + slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> + if (!slots)
> + goto out_free;
> + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
Ditto
Cheers,
Fernando Carrijo.
> + if (mem->slot >= slots->nmemslots)
> + slots->nmemslots = mem->slot + 1;
> +
> + /* actual memory is freed via old in kvm_free_physmem_slot below */
> + if (!npages) {
> + new.rmap = NULL;
> + new.dirty_bitmap = NULL;
> + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
> + new.lpage_info[i] = NULL;
> + }
> +
> + slots->memslots[mem->slot] = new;
> + old_memslots = kvm->memslots;
> + rcu_assign_pointer(kvm->memslots, slots);
> + synchronize_srcu(&kvm->srcu);
>
> kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
>
> - kvm_free_physmem_slot(&old, npages ? &new : NULL);
> - /* Slot deletion case: we have to update the current slot */
> - spin_lock(&kvm->mmu_lock);
> - if (!npages)
> - *memslot = old;
> - spin_unlock(&kvm->mmu_lock);
> -#ifdef CONFIG_DMAR
> - /* map the pages in iommu page table */
> - r = kvm_iommu_map_pages(kvm, memslot);
> - if (r)
> - goto out;
> -#endif
> + kvm_free_physmem_slot(&old, &new);
> + kfree(old_memslots);
> +
> + if (flush_shadow)
> + kvm_arch_flush_shadow(kvm);
> +
> return 0;
>
> out_free:
> @@ -768,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
> struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
> {
> int i;
> - struct kvm_memslots *slots = kvm->memslots;
> + struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
>
> for (i = 0; i < slots->nmemslots; ++i) {
> struct kvm_memory_slot *memslot = &slots->memslots[i];
> @@ -790,12 +826,15 @@ struct kvm_memory_slot *gfn_to_memslot(s
> int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
> {
> int i;
> - struct kvm_memslots *slots = kvm->memslots;
> + struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
>
> gfn = unalias_gfn(kvm, gfn);
> for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
> struct kvm_memory_slot *memslot = &slots->memslots[i];
>
> + if (memslot->flags & KVM_MEMSLOT_INVALID)
> + continue;
> +
> if (gfn >= memslot->base_gfn
> && gfn < memslot->base_gfn + memslot->npages)
> return 1;
> @@ -810,7 +849,7 @@ unsigned long gfn_to_hva(struct kvm *kvm
>
> gfn = unalias_gfn(kvm, gfn);
> slot = gfn_to_memslot_unaliased(kvm, gfn);
> - if (!slot)
> + if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
> return bad_hva();
> return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
> }
> Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
> ===================================================================
> --- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
> +++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
> @@ -1834,6 +1834,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
> struct kvm_memory_slot *memslot;
> int is_dirty = 0;
>
> + down_write(&kvm->slots_lock);
> spin_lock(&kvm->arch.dirty_log_lock);
>
> r = kvm_ia64_sync_dirty_log(kvm, log);
> @@ -1853,6 +1854,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
> }
> r = 0;
> out:
> + up_write(&kvm->slots_lock);
> spin_unlock(&kvm->arch.dirty_log_lock);
> return r;
> }
> Index: kvm-slotslock/arch/x86/kvm/mmu.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/mmu.c
> +++ kvm-slotslock/arch/x86/kvm/mmu.c
> @@ -29,6 +29,7 @@
> #include <linux/swap.h>
> #include <linux/hugetlb.h>
> #include <linux/compiler.h>
> +#include <linux/srcu.h>
>
> #include <asm/page.h>
> #include <asm/cmpxchg.h>
> @@ -766,23 +767,18 @@ static int kvm_unmap_rmapp(struct kvm *k
> static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
> int (*handler)(struct kvm *kvm, unsigned long *rmapp))
> {
> - int i, j;
> + int i, j, idx;
> int retval = 0;
> - struct kvm_memslots *slots = kvm->memslots;
> + struct kvm_memslots *slots;
> +
> + idx = srcu_read_lock(&kvm->srcu);
> + slots = rcu_dereference(kvm->memslots);
>
> - /*
> - * If mmap_sem isn't taken, we can look the memslots with only
> - * the mmu_lock by skipping over the slots with userspace_addr == 0.
> - */
> for (i = 0; i < slots->nmemslots; i++) {
> struct kvm_memory_slot *memslot = &slots->memslots[i];
> unsigned long start = memslot->userspace_addr;
> unsigned long end;
>
> - /* mmu_lock protects userspace_addr */
> - if (!start)
> - continue;
> -
> end = start + (memslot->npages << PAGE_SHIFT);
> if (hva >= start && hva < end) {
> gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
> @@ -797,6 +793,7 @@ static int kvm_handle_hva(struct kvm *kv
> }
> }
> }
> + srcu_read_unlock(&kvm->srcu, idx);
>
> return retval;
> }
> @@ -2966,16 +2963,20 @@ nomem:
> */
> unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
> {
> - int i;
> + int i, idx;
> unsigned int nr_mmu_pages;
> unsigned int nr_pages = 0;
> + struct kvm_memslots *slots;
>
> - for (i = 0; i < kvm->memslots->nmemslots; i++)
> - nr_pages += kvm->memslots->memslots[i].npages;
> + idx = srcu_read_lock(&kvm->srcu);
> + slots = rcu_dereference(kvm->memslots);
> + for (i = 0; i < slots->nmemslots; i++)
> + nr_pages += slots->memslots[i].npages;
>
> nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
> nr_mmu_pages = max(nr_mmu_pages,
> (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
> + srcu_read_unlock(&kvm->srcu, idx);
>
> return nr_mmu_pages;
> }
> @@ -3241,10 +3242,12 @@ static void audit_mappings(struct kvm_vc
> static int count_rmaps(struct kvm_vcpu *vcpu)
> {
> int nmaps = 0;
> - int i, j, k;
> + int i, j, k, idx;
>
> + idx = srcu_read_lock(&kvm->srcu);
> + slots = rcu_dereference(kvm->memslots);
> for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
> - struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
> + struct kvm_memory_slot *m = &slots->memslots[i];
> struct kvm_rmap_desc *d;
>
> for (j = 0; j < m->npages; ++j) {
> @@ -3267,6 +3270,7 @@ static int count_rmaps(struct kvm_vcpu *
> }
> }
> }
> + srcu_read_unlock(&kvm->srcu, idx);
> return nmaps;
> }
>
> Index: kvm-slotslock/arch/x86/kvm/vmx.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/vmx.c
> +++ kvm-slotslock/arch/x86/kvm/vmx.c
> @@ -24,6 +24,7 @@
> #include <linux/mm.h>
> #include <linux/highmem.h>
> #include <linux/sched.h>
> +#include <linux/srcu.h>
> #include <linux/moduleparam.h>
> #include <linux/ftrace_event.h>
> #include "kvm_cache_regs.h"
> @@ -1465,10 +1466,18 @@ static void enter_pmode(struct kvm_vcpu
> static gva_t rmode_tss_base(struct kvm *kvm)
> {
> if (!kvm->arch.tss_addr) {
> - gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
> - kvm->memslots->memslots[0].npages - 3;
> + struct kvm_memslots *slots;
> + gfn_t base_gfn;
> + int idx;
> +
> + idx = srcu_read_lock(&kvm->srcu);
> + slots = rcu_dereference(kvm->memslots);
> + base_gfn = slots->memslots[0].base_gfn +
> + slots->memslots[0].npages - 3;
> + srcu_read_unlock(&kvm->srcu, idx);
> return base_gfn << PAGE_SHIFT;
> }
> +
> return kvm->arch.tss_addr;
> }
>
> Index: kvm-slotslock/include/linux/kvm_host.h
> ===================================================================
> --- kvm-slotslock.orig/include/linux/kvm_host.h
> +++ kvm-slotslock/include/linux/kvm_host.h
> @@ -163,6 +163,7 @@ struct kvm {
> struct rw_semaphore slots_lock;
> struct mm_struct *mm; /* userspace tied to this vm */
> struct kvm_memslots *memslots;
> + struct srcu_struct srcu;
> #ifdef CONFIG_KVM_APIC_ARCHITECTURE
> u32 bsp_vcpu_id;
> struct kvm_vcpu *bsp_vcpu;
> Index: kvm-slotslock/virt/kvm/assigned-dev.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/assigned-dev.c
> +++ kvm-slotslock/virt/kvm/assigned-dev.c
> @@ -504,11 +504,11 @@ out:
> static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
> struct kvm_assigned_pci_dev *assigned_dev)
> {
> - int r = 0;
> + int r = 0, idx;
> struct kvm_assigned_dev_kernel *match;
> struct pci_dev *dev;
>
> - down_read(&kvm->slots_lock);
> + idx = srcu_read_lock(&kvm->srcu);
> mutex_lock(&kvm->lock);
>
> match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> @@ -574,7 +574,7 @@ static int kvm_vm_ioctl_assign_device(st
>
> out:
> mutex_unlock(&kvm->lock);
> - up_read(&kvm->slots_lock);
> + srcu_read_unlock(&kvm->srcu, idx);
> return r;
> out_list_del:
> list_del(&match->list);
> @@ -586,7 +586,7 @@ out_put:
> out_free:
> kfree(match);
> mutex_unlock(&kvm->lock);
> - up_read(&kvm->slots_lock);
> + srcu_read_unlock(&kvm->srcu, idx);
> return r;
> }
>
> Index: kvm-slotslock/virt/kvm/iommu.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/iommu.c
> +++ kvm-slotslock/virt/kvm/iommu.c
> @@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct
> int i, r = 0;
> struct kvm_memslots *slots;
>
> - slots = kvm->memslots;
> + slots = rcu_dereference(kvm->memslots);
>
> for (i = 0; i < slots->nmemslots; i++) {
> r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
> @@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(stru
> int i;
> struct kvm_memslots *slots;
>
> - slots = kvm->memslots;
> + slots = rcu_dereference(kvm->memslots);
>
> for (i = 0; i < slots->nmemslots; i++) {
> kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2009-09-22 10:47 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
2009-09-21 23:37 ` [patch 01/10] KVM: modify memslots layout in struct kvm Marcelo Tosatti
2009-09-21 23:37 ` [patch 02/10] KVM: modify alias layout in x86s struct kvm_arch Marcelo Tosatti
2009-09-21 23:37 ` [patch 03/10] KVM: switch dirty_log to mmu_lock protection Marcelo Tosatti
2009-09-22 6:37 ` Avi Kivity
2009-09-22 12:44 ` Marcelo Tosatti
2009-09-22 12:52 ` Avi Kivity
2009-09-21 23:37 ` [patch 04/10] KVM: split kvm_arch_set_memory_region into prepare and commit Marcelo Tosatti
2009-09-22 6:40 ` Avi Kivity
2009-09-21 23:37 ` [patch 05/10] KVM: introduce gfn_to_pfn_memslot Marcelo Tosatti
2009-09-21 23:37 ` [patch 06/10] KVM: use gfn_to_pfn_memslot in kvm_iommu_map_pages Marcelo Tosatti
2009-09-21 23:37 ` [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update Marcelo Tosatti
2009-09-22 6:59 ` Avi Kivity
2009-09-22 16:16 ` Marcelo Tosatti
2009-09-22 10:40 ` Fernando Carrijo [this message]
2009-09-22 12:55 ` Marcelo Tosatti
2009-09-24 14:06 ` Marcelo Tosatti
2009-09-24 17:28 ` Paul E. McKenney
2009-09-24 18:05 ` Marcelo Tosatti
2009-09-25 15:05 ` Avi Kivity
2009-09-21 23:37 ` [patch 08/10] KVM: x86: switch kvm_set_memory_alias " Marcelo Tosatti
2009-09-22 7:04 ` Avi Kivity
2009-09-21 23:37 ` [patch 09/10] KVM: convert io_bus to SRCU Marcelo Tosatti
2009-09-21 23:37 ` [patch 10/10] KVM: switch vcpu context to use SRCU Marcelo Tosatti
2009-09-22 7:07 ` Avi Kivity
2009-09-22 7:09 ` [patch 00/10] RFC: " Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1253616010.6210.2.camel@pc-fernando \
--to=fcarrijo@yahoo.com.br \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).