All of lore.kernel.org
 help / color / mirror / Atom feed
From: Fernando Carrijo <fcarrijo@yahoo.com.br>
To: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org, avi@redhat.com
Subject: Re: [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
Date: Tue, 22 Sep 2009 07:40:10 -0300	[thread overview]
Message-ID: <1253616010.6210.2.camel@pc-fernando> (raw)
In-Reply-To: <20090921234124.596305294@amt.cnet>

Resending with Cc: added

On Mon, 2009-09-21 at 20:37 -0300, Marcelo Tosatti wrote:

> plain text document attachment (introduce-srcu-and-use-for-slots)
> Use two steps for memslot deletion: mark the slot invalid (which stops 
> instantiation of new shadow pages for that slot, but allows destruction),
> then instantiate the new empty slot.
> 
> Also simplifies kvm_handle_hva locking.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> Index: kvm-slotslock/include/linux/kvm.h
> ===================================================================
> --- kvm-slotslock.orig/include/linux/kvm.h
> +++ kvm-slotslock/include/linux/kvm.h
> @@ -39,7 +39,7 @@ struct kvm_userspace_memory_region {
>  
>  /* for kvm_memory_region::flags */
>  #define KVM_MEM_LOG_DIRTY_PAGES  1UL
> -
> +#define KVM_MEMSLOT_INVALID      (1UL << 1)
>  
>  /* for KVM_IRQ_LINE */
>  struct kvm_irq_level {
> Index: kvm-slotslock/virt/kvm/kvm_main.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/kvm_main.c
> +++ kvm-slotslock/virt/kvm/kvm_main.c
> @@ -43,6 +43,7 @@
>  #include <linux/swap.h>
>  #include <linux/bitops.h>
>  #include <linux/spinlock.h>
> +#include <linux/srcu.h>
>  
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -352,11 +353,15 @@ static struct kvm *kvm_create_vm(void)
>  	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
>  	if (!kvm->memslots)
>  		goto out_err;
> +	if (init_srcu_struct(&kvm->srcu))
> +		goto out_err;
>  
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>  	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> -	if (!page)
> +	if (!page) {
> +		cleanup_srcu_struct(&kvm->srcu);
>  		goto out_err;
> +	}
>  
>  	kvm->coalesced_mmio_ring =
>  			(struct kvm_coalesced_mmio_ring *)page_address(page);
> @@ -367,6 +372,7 @@ static struct kvm *kvm_create_vm(void)
>  		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
>  		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
>  		if (r) {
> +			cleanup_srcu_struct(&kvm->srcu);
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>  			put_page(page);
>  #endif
> @@ -462,6 +468,7 @@ static void kvm_destroy_vm(struct kvm *k
>  	kvm_arch_flush_shadow(kvm);
>  #endif
>  	kvm_arch_destroy_vm(kvm);
> +	cleanup_srcu_struct(&kvm->srcu);
>  	hardware_disable_all();
>  	mmdrop(mm);
>  }
> @@ -502,12 +509,13 @@ int __kvm_set_memory_region(struct kvm *
>  			    struct kvm_userspace_memory_region *mem,
>  			    int user_alloc)
>  {
> -	int r;
> +	int r, flush_shadow = 0;
>  	gfn_t base_gfn;
>  	unsigned long npages;
>  	unsigned long i;
>  	struct kvm_memory_slot *memslot;
>  	struct kvm_memory_slot old, new;
> +	struct kvm_memslots *slots, *old_memslots;
>  
>  	r = -EINVAL;
>  	/* General sanity checks */
> @@ -569,15 +577,7 @@ int __kvm_set_memory_region(struct kvm *
>  		memset(new.rmap, 0, npages * sizeof(*new.rmap));
>  
>  		new.user_alloc = user_alloc;
> -		/*
> -		 * hva_to_rmmap() serialzies with the mmu_lock and to be
> -		 * safe it has to ignore memslots with !user_alloc &&
> -		 * !userspace_addr.
> -		 */
> -		if (user_alloc)
> -			new.userspace_addr = mem->userspace_addr;
> -		else
> -			new.userspace_addr = 0;
> +		new.userspace_addr = mem->userspace_addr;
>  	}
>  	if (!npages)
>  		goto skip_lpage;
> @@ -632,8 +632,9 @@ skip_lpage:
>  		if (!new.dirty_bitmap)
>  			goto out_free;
>  		memset(new.dirty_bitmap, 0, dirty_bytes);
> +		/* destroy any largepage mappings for dirty tracking */
>  		if (old.npages)
> -			kvm_arch_flush_shadow(kvm);
> +			flush_shadow = 1;
>  	}
>  #else  /* not defined CONFIG_S390 */
>  	new.user_alloc = user_alloc;
> @@ -641,34 +642,69 @@ skip_lpage:
>  		new.userspace_addr = mem->userspace_addr;
>  #endif /* not defined CONFIG_S390 */
>  
> -	if (!npages)
> +	if (!npages) {
> +		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +		if (!slots)
> +			goto out_free;
> +		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));

Nothing wrong with the above line, but it makes me think if

                  *slots = *kvm->memslots;

would save us the function call overhead

> +		if (mem->slot >= slots->nmemslots)
> +			slots->nmemslots = mem->slot + 1;
> +		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
> +
> +		old_memslots = kvm->memslots;
> +		rcu_assign_pointer(kvm->memslots, slots);
> +		synchronize_srcu(&kvm->srcu);
> +		/* From this point no new shadow pages pointing to a deleted
> +		 * memslot will be created.
> +         	 *
> +         	 * validation of sp->gfn happens in:
> +         	 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
> +         	 * 	- kvm_is_visible_gfn (mmu_check_roots)
> +         	 */
>  		kvm_arch_flush_shadow(kvm);
> +		kfree(old_memslots);
> +	}
>  
>  	r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
>  	if (r)
>  		goto out_free;
>  
> -	spin_lock(&kvm->mmu_lock);
> -	if (mem->slot >= kvm->memslots->nmemslots)
> -		kvm->memslots->nmemslots = mem->slot + 1;
> +#ifdef CONFIG_DMAR
> +	/* map the pages in iommu page table */
> +	if (npages)
> +		r = kvm_iommu_map_pages(kvm, &new);
> +		if (r)
> +			goto out_free;
> +#endif
>  
> -	*memslot = new;
> -	spin_unlock(&kvm->mmu_lock);
> +	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +	if (!slots)
> +		goto out_free;
> +	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));

Ditto

Cheers,

Fernando Carrijo.

> +	if (mem->slot >= slots->nmemslots)
> +		slots->nmemslots = mem->slot + 1;
> +
> +	/* actual memory is freed via old in kvm_free_physmem_slot below */
> +	if (!npages) {
> +		new.rmap = NULL;
> +		new.dirty_bitmap = NULL;
> +		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
> +			new.lpage_info[i] = NULL;
> +	}
> +
> +	slots->memslots[mem->slot] = new;
> +	old_memslots = kvm->memslots;
> +	rcu_assign_pointer(kvm->memslots, slots);
> +	synchronize_srcu(&kvm->srcu);
>  
>  	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
>  
> -	kvm_free_physmem_slot(&old, npages ? &new : NULL);
> -	/* Slot deletion case: we have to update the current slot */
> -	spin_lock(&kvm->mmu_lock);
> -	if (!npages)
> -		*memslot = old;
> -	spin_unlock(&kvm->mmu_lock);
> -#ifdef CONFIG_DMAR
> -	/* map the pages in iommu page table */
> -	r = kvm_iommu_map_pages(kvm, memslot);
> -	if (r)
> -		goto out;
> -#endif
> +	kvm_free_physmem_slot(&old, &new);
> +	kfree(old_memslots);
> +
> +	if (flush_shadow)
> +		kvm_arch_flush_shadow(kvm);
> +
>  	return 0;
>  
>  out_free:
> @@ -768,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
>  struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
>  {
>  	int i;
> -	struct kvm_memslots *slots = kvm->memslots;
> +	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
>  
>  	for (i = 0; i < slots->nmemslots; ++i) {
>  		struct kvm_memory_slot *memslot = &slots->memslots[i];
> @@ -790,12 +826,15 @@ struct kvm_memory_slot *gfn_to_memslot(s
>  int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
>  {
>  	int i;
> -	struct kvm_memslots *slots = kvm->memslots;
> +	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
>  
>  	gfn = unalias_gfn(kvm, gfn);
>  	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
>  		struct kvm_memory_slot *memslot = &slots->memslots[i];
>  
> +		if (memslot->flags & KVM_MEMSLOT_INVALID)
> +			continue;
> +
>  		if (gfn >= memslot->base_gfn
>  		    && gfn < memslot->base_gfn + memslot->npages)
>  			return 1;
> @@ -810,7 +849,7 @@ unsigned long gfn_to_hva(struct kvm *kvm
>  
>  	gfn = unalias_gfn(kvm, gfn);
>  	slot = gfn_to_memslot_unaliased(kvm, gfn);
> -	if (!slot)
> +	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
>  		return bad_hva();
>  	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
>  }
> Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
> ===================================================================
> --- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
> +++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
> @@ -1834,6 +1834,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
>  	struct kvm_memory_slot *memslot;
>  	int is_dirty = 0;
>  
> +	down_write(&kvm->slots_lock);
>  	spin_lock(&kvm->arch.dirty_log_lock);
>  
>  	r = kvm_ia64_sync_dirty_log(kvm, log);
> @@ -1853,6 +1854,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
>  	}
>  	r = 0;
>  out:
> +	up_write(&kvm->slots_lock);
>  	spin_unlock(&kvm->arch.dirty_log_lock);
>  	return r;
>  }
> Index: kvm-slotslock/arch/x86/kvm/mmu.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/mmu.c
> +++ kvm-slotslock/arch/x86/kvm/mmu.c
> @@ -29,6 +29,7 @@
>  #include <linux/swap.h>
>  #include <linux/hugetlb.h>
>  #include <linux/compiler.h>
> +#include <linux/srcu.h>
>  
>  #include <asm/page.h>
>  #include <asm/cmpxchg.h>
> @@ -766,23 +767,18 @@ static int kvm_unmap_rmapp(struct kvm *k
>  static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
>  			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
>  {
> -	int i, j;
> +	int i, j, idx;
>  	int retval = 0;
> -	struct kvm_memslots *slots = kvm->memslots;
> +	struct kvm_memslots *slots;
> +
> +	idx = srcu_read_lock(&kvm->srcu);
> +	slots = rcu_dereference(kvm->memslots);
>  
> -	/*
> -	 * If mmap_sem isn't taken, we can look the memslots with only
> -	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
> -	 */
>  	for (i = 0; i < slots->nmemslots; i++) {
>  		struct kvm_memory_slot *memslot = &slots->memslots[i];
>  		unsigned long start = memslot->userspace_addr;
>  		unsigned long end;
>  
> -		/* mmu_lock protects userspace_addr */
> -		if (!start)
> -			continue;
> -
>  		end = start + (memslot->npages << PAGE_SHIFT);
>  		if (hva >= start && hva < end) {
>  			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
> @@ -797,6 +793,7 @@ static int kvm_handle_hva(struct kvm *kv
>  			}
>  		}
>  	}
> +	srcu_read_unlock(&kvm->srcu, idx);
>  
>  	return retval;
>  }
> @@ -2966,16 +2963,20 @@ nomem:
>   */
>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
>  {
> -	int i;
> +	int i, idx;
>  	unsigned int nr_mmu_pages;
>  	unsigned int  nr_pages = 0;
> +	struct kvm_memslots *slots;
>  
> -	for (i = 0; i < kvm->memslots->nmemslots; i++)
> -		nr_pages += kvm->memslots->memslots[i].npages;
> +	idx = srcu_read_lock(&kvm->srcu);
> +	slots = rcu_dereference(kvm->memslots);
> +	for (i = 0; i < slots->nmemslots; i++)
> +		nr_pages += slots->memslots[i].npages;
>  
>  	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
>  	nr_mmu_pages = max(nr_mmu_pages,
>  			(unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
> +	srcu_read_unlock(&kvm->srcu, idx);
>  
>  	return nr_mmu_pages;
>  }
> @@ -3241,10 +3242,12 @@ static void audit_mappings(struct kvm_vc
>  static int count_rmaps(struct kvm_vcpu *vcpu)
>  {
>  	int nmaps = 0;
> -	int i, j, k;
> +	int i, j, k, idx;
>  
> +	idx = srcu_read_lock(&kvm->srcu);
> +	slots = rcu_dereference(kvm->memslots);
>  	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
> -		struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
> +		struct kvm_memory_slot *m = &slots->memslots[i];
>  		struct kvm_rmap_desc *d;
>  
>  		for (j = 0; j < m->npages; ++j) {
> @@ -3267,6 +3270,7 @@ static int count_rmaps(struct kvm_vcpu *
>  			}
>  		}
>  	}
> +	srcu_read_unlock(&kvm->srcu, idx);
>  	return nmaps;
>  }
>  
> Index: kvm-slotslock/arch/x86/kvm/vmx.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/vmx.c
> +++ kvm-slotslock/arch/x86/kvm/vmx.c
> @@ -24,6 +24,7 @@
>  #include <linux/mm.h>
>  #include <linux/highmem.h>
>  #include <linux/sched.h>
> +#include <linux/srcu.h>
>  #include <linux/moduleparam.h>
>  #include <linux/ftrace_event.h>
>  #include "kvm_cache_regs.h"
> @@ -1465,10 +1466,18 @@ static void enter_pmode(struct kvm_vcpu 
>  static gva_t rmode_tss_base(struct kvm *kvm)
>  {
>  	if (!kvm->arch.tss_addr) {
> -		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
> -				 kvm->memslots->memslots[0].npages - 3;
> +		struct kvm_memslots *slots;
> +		gfn_t base_gfn;
> +		int idx;
> +
> +		idx = srcu_read_lock(&kvm->srcu);
> +		slots = rcu_dereference(kvm->memslots);
> + 		base_gfn = slots->memslots[0].base_gfn +
> +				 slots->memslots[0].npages - 3;
> +		srcu_read_unlock(&kvm->srcu, idx);
>  		return base_gfn << PAGE_SHIFT;
>  	}
> +
>  	return kvm->arch.tss_addr;
>  }
>  
> Index: kvm-slotslock/include/linux/kvm_host.h
> ===================================================================
> --- kvm-slotslock.orig/include/linux/kvm_host.h
> +++ kvm-slotslock/include/linux/kvm_host.h
> @@ -163,6 +163,7 @@ struct kvm {
>  	struct rw_semaphore slots_lock;
>  	struct mm_struct *mm; /* userspace tied to this vm */
>  	struct kvm_memslots *memslots;
> +	struct srcu_struct srcu;
>  #ifdef CONFIG_KVM_APIC_ARCHITECTURE
>  	u32 bsp_vcpu_id;
>  	struct kvm_vcpu *bsp_vcpu;
> Index: kvm-slotslock/virt/kvm/assigned-dev.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/assigned-dev.c
> +++ kvm-slotslock/virt/kvm/assigned-dev.c
> @@ -504,11 +504,11 @@ out:
>  static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
>  				      struct kvm_assigned_pci_dev *assigned_dev)
>  {
> -	int r = 0;
> +	int r = 0, idx;
>  	struct kvm_assigned_dev_kernel *match;
>  	struct pci_dev *dev;
>  
> -	down_read(&kvm->slots_lock);
> +	idx = srcu_read_lock(&kvm->srcu);
>  	mutex_lock(&kvm->lock);
>  
>  	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> @@ -574,7 +574,7 @@ static int kvm_vm_ioctl_assign_device(st
>  
>  out:
>  	mutex_unlock(&kvm->lock);
> -	up_read(&kvm->slots_lock);
> +	srcu_read_unlock(&kvm->srcu, idx);
>  	return r;
>  out_list_del:
>  	list_del(&match->list);
> @@ -586,7 +586,7 @@ out_put:
>  out_free:
>  	kfree(match);
>  	mutex_unlock(&kvm->lock);
> -	up_read(&kvm->slots_lock);
> +	srcu_read_unlock(&kvm->srcu, idx);
>  	return r;
>  }
>  
> Index: kvm-slotslock/virt/kvm/iommu.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/iommu.c
> +++ kvm-slotslock/virt/kvm/iommu.c
> @@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct
>  	int i, r = 0;
>  	struct kvm_memslots *slots;
>  
> -	slots = kvm->memslots;
> +	slots = rcu_dereference(kvm->memslots);
>  
>  	for (i = 0; i < slots->nmemslots; i++) {
>  		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
> @@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(stru
>  	int i;
>  	struct kvm_memslots *slots;
>  
> -	slots = kvm->memslots;
> +	slots = rcu_dereference(kvm->memslots);
>  
>  	for (i = 0; i < slots->nmemslots; i++) {
>  		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


  parent reply	other threads:[~2009-09-22 10:47 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
2009-09-21 23:37 ` [patch 01/10] KVM: modify memslots layout in struct kvm Marcelo Tosatti
2009-09-21 23:37 ` [patch 02/10] KVM: modify alias layout in x86s struct kvm_arch Marcelo Tosatti
2009-09-21 23:37 ` [patch 03/10] KVM: switch dirty_log to mmu_lock protection Marcelo Tosatti
2009-09-22  6:37   ` Avi Kivity
2009-09-22 12:44     ` Marcelo Tosatti
2009-09-22 12:52       ` Avi Kivity
2009-09-21 23:37 ` [patch 04/10] KVM: split kvm_arch_set_memory_region into prepare and commit Marcelo Tosatti
2009-09-22  6:40   ` Avi Kivity
2009-09-21 23:37 ` [patch 05/10] KVM: introduce gfn_to_pfn_memslot Marcelo Tosatti
2009-09-21 23:37 ` [patch 06/10] KVM: use gfn_to_pfn_memslot in kvm_iommu_map_pages Marcelo Tosatti
2009-09-21 23:37 ` [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update Marcelo Tosatti
2009-09-22  6:59   ` Avi Kivity
2009-09-22 16:16     ` Marcelo Tosatti
2009-09-22 10:40   ` Fernando Carrijo [this message]
2009-09-22 12:55     ` Marcelo Tosatti
2009-09-24 14:06   ` Marcelo Tosatti
2009-09-24 17:28     ` Paul E. McKenney
2009-09-24 18:05       ` Marcelo Tosatti
2009-09-25 15:05       ` Avi Kivity
2009-09-21 23:37 ` [patch 08/10] KVM: x86: switch kvm_set_memory_alias " Marcelo Tosatti
2009-09-22  7:04   ` Avi Kivity
2009-09-21 23:37 ` [patch 09/10] KVM: convert io_bus to SRCU Marcelo Tosatti
2009-09-21 23:37 ` [patch 10/10] KVM: switch vcpu context to use SRCU Marcelo Tosatti
2009-09-22  7:07   ` Avi Kivity
2009-09-22  7:09 ` [patch 00/10] RFC: " Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1253616010.6210.2.camel@pc-fernando \
    --to=fcarrijo@yahoo.com.br \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.