kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 00/10] RFC: switch vcpu context to use SRCU
@ 2009-09-21 23:37 Marcelo Tosatti
  2009-09-21 23:37 ` [patch 01/10] KVM: modify memslots layout in struct kvm Marcelo Tosatti
                   ` (10 more replies)
  0 siblings, 11 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi

Improves vmexit latency by ~= 13% on UP guest.

Still requires decent stress testing and careful review, but posting for
early comments.



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 01/10] KVM: modify memslots layout in struct kvm
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
@ 2009-09-21 23:37 ` Marcelo Tosatti
  2009-09-21 23:37 ` [patch 02/10] KVM: modify alias layout in x86s struct kvm_arch Marcelo Tosatti
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: modify-memslot-layout --]
[-- Type: text/plain, Size: 8597 bytes --]

Have a pointer to an allocated region inside struct kvm.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-slotslock/arch/x86/kvm/mmu.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/mmu.c
+++ kvm-slotslock/arch/x86/kvm/mmu.c
@@ -768,13 +768,14 @@ static int kvm_handle_hva(struct kvm *kv
 {
 	int i, j;
 	int retval = 0;
+	struct kvm_memslots *slots = kvm->memslots;
 
 	/*
 	 * If mmap_sem isn't taken, we can look the memslots with only
 	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
 	 */
-	for (i = 0; i < kvm->nmemslots; i++) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+	for (i = 0; i < slots->nmemslots; i++) {
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 		unsigned long start = memslot->userspace_addr;
 		unsigned long end;
 
@@ -2969,8 +2970,8 @@ unsigned int kvm_mmu_calculate_mmu_pages
 	unsigned int nr_mmu_pages;
 	unsigned int  nr_pages = 0;
 
-	for (i = 0; i < kvm->nmemslots; i++)
-		nr_pages += kvm->memslots[i].npages;
+	for (i = 0; i < kvm->memslots->nmemslots; i++)
+		nr_pages += kvm->memslots->memslots[i].npages;
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 	nr_mmu_pages = max(nr_mmu_pages,
@@ -3243,7 +3244,7 @@ static int count_rmaps(struct kvm_vcpu *
 	int i, j, k;
 
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
+		struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
 		struct kvm_rmap_desc *d;
 
 		for (j = 0; j < m->npages; ++j) {
Index: kvm-slotslock/arch/x86/kvm/vmx.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/vmx.c
+++ kvm-slotslock/arch/x86/kvm/vmx.c
@@ -1465,8 +1465,8 @@ static void enter_pmode(struct kvm_vcpu 
 static gva_t rmode_tss_base(struct kvm *kvm)
 {
 	if (!kvm->arch.tss_addr) {
-		gfn_t base_gfn = kvm->memslots[0].base_gfn +
-				 kvm->memslots[0].npages - 3;
+		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
+				 kvm->memslots->memslots[0].npages - 3;
 		return base_gfn << PAGE_SHIFT;
 	}
 	return kvm->arch.tss_addr;
Index: kvm-slotslock/arch/x86/kvm/x86.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/x86.c
+++ kvm-slotslock/arch/x86/kvm/x86.c
@@ -2159,7 +2159,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
 		spin_lock(&kvm->mmu_lock);
 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
 		spin_unlock(&kvm->mmu_lock);
-		memslot = &kvm->memslots[log->slot];
+		memslot = &kvm->memslots->memslots[log->slot];
 		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 		memset(memslot->dirty_bitmap, 0, n);
 	}
@@ -4845,7 +4845,7 @@ int kvm_arch_set_memory_region(struct kv
 				int user_alloc)
 {
 	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+	struct kvm_memory_slot *memslot = &kvm->memslots->memslots[mem->slot];
 
 	/*To keep backward compatibility with older userspace,
 	 *x86 needs to hanlde !user_alloc case.
Index: kvm-slotslock/include/linux/kvm_host.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm_host.h
+++ kvm-slotslock/include/linux/kvm_host.h
@@ -151,14 +151,18 @@ struct kvm_irq_routing_table {};
 
 #endif
 
+struct kvm_memslots {
+	int nmemslots;
+	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
+					KVM_PRIVATE_MEM_SLOTS];
+};
+
 struct kvm {
 	spinlock_t mmu_lock;
 	spinlock_t requests_lock;
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
-	int nmemslots;
-	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
-					KVM_PRIVATE_MEM_SLOTS];
+	struct kvm_memslots *memslots;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 	struct kvm_vcpu *bsp_vcpu;
@@ -482,7 +486,7 @@ static inline void kvm_guest_exit(void)
 
 static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
-	return slot - kvm->memslots;
+	return slot - kvm->memslots->memslots;
 }
 
 static inline gpa_t gfn_to_gpa(gfn_t gfn)
Index: kvm-slotslock/virt/kvm/iommu.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/iommu.c
+++ kvm-slotslock/virt/kvm/iommu.c
@@ -76,10 +76,13 @@ unmap_pages:
 static int kvm_iommu_map_memslots(struct kvm *kvm)
 {
 	int i, r = 0;
+	struct kvm_memslots *slots;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
-					kvm->memslots[i].npages);
+	slots = kvm->memslots;
+
+	for (i = 0; i < slots->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, slots->memslots[i].base_gfn,
+					slots->memslots[i].npages);
 		if (r)
 			break;
 	}
@@ -210,10 +213,13 @@ static void kvm_iommu_put_pages(struct k
 static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 {
 	int i;
+	struct kvm_memslots *slots;
+
+	slots = kvm->memslots;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
-				    kvm->memslots[i].npages);
+	for (i = 0; i < slots->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
+				    slots->memslots[i].npages);
 	}
 
 	return 0;
Index: kvm-slotslock/virt/kvm/kvm_main.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/kvm_main.c
+++ kvm-slotslock/virt/kvm/kvm_main.c
@@ -348,12 +348,16 @@ static struct kvm *kvm_create_vm(void)
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
+	r = -ENOMEM;
+	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+	if (!kvm->memslots)
+		goto out_err;
+
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!page) {
-		r = -ENOMEM;
+	if (!page)
 		goto out_err;
-	}
+
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
 #endif
@@ -394,6 +398,7 @@ out:
 out_err:
 	hardware_disable_all();
 out_err_nodisable:
+	kfree(kvm->memslots);
 	kfree(kvm);
 	return ERR_PTR(r);
 }
@@ -428,9 +433,12 @@ static void kvm_free_physmem_slot(struct
 void kvm_free_physmem(struct kvm *kvm)
 {
 	int i;
+	struct kvm_memslots *slots = kvm->memslots;
+
+	for (i = 0; i < slots->nmemslots; ++i)
+		kvm_free_physmem_slot(&slots->memslots[i], NULL);
 
-	for (i = 0; i < kvm->nmemslots; ++i)
-		kvm_free_physmem_slot(&kvm->memslots[i], NULL);
+	kfree(kvm->memslots);
 }
 
 static void kvm_destroy_vm(struct kvm *kvm)
@@ -514,7 +522,7 @@ int __kvm_set_memory_region(struct kvm *
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
 		goto out;
 
-	memslot = &kvm->memslots[mem->slot];
+	memslot = &kvm->memslots->memslots[mem->slot];
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
@@ -535,7 +543,7 @@ int __kvm_set_memory_region(struct kvm *
 	/* Check for overlaps */
 	r = -EEXIST;
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *s = &kvm->memslots[i];
+		struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
 
 		if (s == memslot || !s->npages)
 			continue;
@@ -637,8 +645,8 @@ skip_lpage:
 		kvm_arch_flush_shadow(kvm);
 
 	spin_lock(&kvm->mmu_lock);
-	if (mem->slot >= kvm->nmemslots)
-		kvm->nmemslots = mem->slot + 1;
+	if (mem->slot >= kvm->memslots->nmemslots)
+		kvm->memslots->nmemslots = mem->slot + 1;
 
 	*memslot = new;
 	spin_unlock(&kvm->mmu_lock);
@@ -708,7 +716,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots[log->slot];
+	memslot = &kvm->memslots->memslots[log->slot];
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
@@ -762,9 +770,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
+	struct kvm_memslots *slots = kvm->memslots;
 
-	for (i = 0; i < kvm->nmemslots; ++i) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+	for (i = 0; i < slots->nmemslots; ++i) {
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
@@ -783,10 +792,11 @@ struct kvm_memory_slot *gfn_to_memslot(s
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
+	struct kvm_memslots *slots = kvm->memslots;
 
 	gfn = unalias_gfn(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 02/10] KVM: modify alias layout in x86s struct kvm_arch
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
  2009-09-21 23:37 ` [patch 01/10] KVM: modify memslots layout in struct kvm Marcelo Tosatti
@ 2009-09-21 23:37 ` Marcelo Tosatti
  2009-09-21 23:37 ` [patch 03/10] KVM: switch dirty_log to mmu_lock protection Marcelo Tosatti
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: modify-alias-layout --]
[-- Type: text/plain, Size: 2726 bytes --]

Have a pointer to an allocated region inside x86's kvm_arch.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-slotslock/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm-slotslock.orig/arch/x86/include/asm/kvm_host.h
+++ kvm-slotslock/arch/x86/include/asm/kvm_host.h
@@ -379,9 +379,13 @@ struct kvm_mem_alias {
 	gfn_t target_gfn;
 };
 
-struct kvm_arch{
-	int naliases;
+struct kvm_mem_aliases {
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
+	int naliases;
+};
+
+struct kvm_arch {
+	struct kvm_mem_aliases *aliases;
 
 	unsigned int n_free_mmu_pages;
 	unsigned int n_requested_mmu_pages;
Index: kvm-slotslock/arch/x86/kvm/x86.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/x86.c
+++ kvm-slotslock/arch/x86/kvm/x86.c
@@ -1959,9 +1959,10 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t
 {
 	int i;
 	struct kvm_mem_alias *alias;
+	struct kvm_mem_aliases *aliases = kvm->arch.aliases;
 
-	for (i = 0; i < kvm->arch.naliases; ++i) {
-		alias = &kvm->arch.aliases[i];
+	for (i = 0; i < aliases->naliases; ++i) {
+		alias = &aliases->aliases[i];
 		if (gfn >= alias->base_gfn
 		    && gfn < alias->base_gfn + alias->npages)
 			return alias->target_gfn + gfn - alias->base_gfn;
@@ -1979,6 +1980,7 @@ static int kvm_vm_ioctl_set_memory_alias
 {
 	int r, n;
 	struct kvm_mem_alias *p;
+	struct kvm_mem_aliases *aliases;
 
 	r = -EINVAL;
 	/* General sanity checks */
@@ -1998,15 +2000,17 @@ static int kvm_vm_ioctl_set_memory_alias
 	down_write(&kvm->slots_lock);
 	spin_lock(&kvm->mmu_lock);
 
-	p = &kvm->arch.aliases[alias->slot];
+	aliases = kvm->arch.aliases;
+
+	p = &aliases->aliases[alias->slot];
 	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
 	p->npages = alias->memory_size >> PAGE_SHIFT;
 	p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
 
 	for (n = KVM_ALIAS_SLOTS; n > 0; --n)
-		if (kvm->arch.aliases[n - 1].npages)
+		if (aliases->aliases[n - 1].npages)
 			break;
-	kvm->arch.naliases = n;
+	aliases->naliases = n;
 
 	spin_unlock(&kvm->mmu_lock);
 	kvm_mmu_zap_all(kvm);
@@ -4780,6 +4784,12 @@ struct  kvm *kvm_arch_create_vm(void)
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
 
+	kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+	if (!kvm->arch.aliases) {
+		kfree(kvm);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
@@ -4836,6 +4846,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm
 		put_page(kvm->arch.apic_access_page);
 	if (kvm->arch.ept_identity_pagetable)
 		put_page(kvm->arch.ept_identity_pagetable);
+	kfree(kvm->arch.aliases);
 	kfree(kvm);
 }
 



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 03/10] KVM: switch dirty_log to mmu_lock protection
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
  2009-09-21 23:37 ` [patch 01/10] KVM: modify memslots layout in struct kvm Marcelo Tosatti
  2009-09-21 23:37 ` [patch 02/10] KVM: modify alias layout in x86s struct kvm_arch Marcelo Tosatti
@ 2009-09-21 23:37 ` Marcelo Tosatti
  2009-09-22  6:37   ` Avi Kivity
  2009-09-21 23:37 ` [patch 04/10] KVM: split kvm_arch_set_memory_region into prepare and commit Marcelo Tosatti
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: get-dirty-log --]
[-- Type: text/plain, Size: 3795 bytes --]

get_dirty_log vs mark_page_dirty need to be mutually exclusive. Switch
to mmu_lock protection.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-slotslock/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm-slotslock/arch/x86/kvm/paging_tmpl.h
@@ -175,7 +175,9 @@ walk:
 		if (!(pte & PT_ACCESSED_MASK)) {
 			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
 						       sizeof(pte));
+			spin_lock(&vcpu->kvm->mmu_lock);
 			mark_page_dirty(vcpu->kvm, table_gfn);
+			spin_unlock(&vcpu->kvm->mmu_lock);
 			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
 			    index, pte, pte|PT_ACCESSED_MASK))
 				goto walk;
@@ -215,7 +217,9 @@ walk:
 		bool ret;
 
 		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
+		spin_lock(&vcpu->kvm->mmu_lock);
 		mark_page_dirty(vcpu->kvm, table_gfn);
+		spin_unlock(&vcpu->kvm->mmu_lock);
 		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
 			    pte|PT_DIRTY_MASK);
 		if (ret)
Index: kvm-slotslock/arch/x86/kvm/x86.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/x86.c
+++ kvm-slotslock/arch/x86/kvm/x86.c
@@ -692,7 +692,9 @@ static void kvm_write_guest_time(struct 
 
 	kunmap_atomic(shared_kaddr, KM_USER0);
 
+	spin_lock(&v->kvm->mmu_lock);
 	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+	spin_unlock(&v->kvm->mmu_lock);
 }
 
 static int kvm_request_guest_time_update(struct kvm_vcpu *v)
@@ -2147,27 +2149,45 @@ static int kvm_vm_ioctl_reinject(struct 
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				      struct kvm_dirty_log *log)
 {
-	int r;
-	int n;
+	int r, n, i;
 	struct kvm_memory_slot *memslot;
-	int is_dirty = 0;
+	unsigned long is_dirty = 0;
+	unsigned long *dirty_bitmap;
 
 	down_write(&kvm->slots_lock);
 
-	r = kvm_get_dirty_log(kvm, log, &is_dirty);
-	if (r)
+	r = -EINVAL;
+	if (log->slot >= KVM_MEMORY_SLOTS)
+		goto out;
+
+	memslot = &kvm->memslots->memslots[log->slot];
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+	r = -ENOMEM;
+	dirty_bitmap = vmalloc(n);
+	if (!dirty_bitmap)
 		goto out;
+	memset(dirty_bitmap, 0, n);
+
+	spin_lock(&kvm->mmu_lock);
+	for (i = 0; !is_dirty && i < n/sizeof(long); ++i)
+		is_dirty = memslot->dirty_bitmap[i];
 
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
-		spin_lock(&kvm->mmu_lock);
+		memcpy(dirty_bitmap, memslot->dirty_bitmap, n);
+		memset(memslot->dirty_bitmap, 0, n);
 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
-		spin_unlock(&kvm->mmu_lock);
 		memslot = &kvm->memslots->memslots[log->slot];
-		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
-		memset(memslot->dirty_bitmap, 0, n);
 	}
+	spin_unlock(&kvm->mmu_lock);
+
 	r = 0;
+	if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
+		r = -EFAULT;
 out:
 	up_write(&kvm->slots_lock);
 	return r;
@@ -3491,7 +3511,9 @@ static void vapic_exit(struct kvm_vcpu *
 
 	down_read(&vcpu->kvm->slots_lock);
 	kvm_release_page_dirty(apic->vapic_page);
+	spin_lock(&vcpu->kvm->mmu_lock);
 	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
+	spin_unlock(&vcpu->kvm->mmu_lock);
 	up_read(&vcpu->kvm->slots_lock);
 }
 
Index: kvm-slotslock/virt/kvm/kvm_main.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/kvm_main.c
+++ kvm-slotslock/virt/kvm/kvm_main.c
@@ -1007,7 +1007,9 @@ int kvm_write_guest_page(struct kvm *kvm
 	r = copy_to_user((void __user *)addr + offset, data, len);
 	if (r)
 		return -EFAULT;
+	spin_lock(&kvm->mmu_lock);
 	mark_page_dirty(kvm, gfn);
+	spin_unlock(&kvm->mmu_lock);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_page);



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 04/10] KVM: split kvm_arch_set_memory_region into prepare and commit
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
                   ` (2 preceding siblings ...)
  2009-09-21 23:37 ` [patch 03/10] KVM: switch dirty_log to mmu_lock protection Marcelo Tosatti
@ 2009-09-21 23:37 ` Marcelo Tosatti
  2009-09-22  6:40   ` Avi Kivity
  2009-09-21 23:37 ` [patch 05/10] KVM: introduce gfn_to_pfn_memslot Marcelo Tosatti
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: prepare-commit-memory-region --]
[-- Type: text/plain, Size: 6504 bytes --]

Required for SRCU convertion later.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
@@ -1576,15 +1576,14 @@ out:
 	return r;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+		struct kvm_memory_slot *memslot,
 		struct kvm_memory_slot old,
 		int user_alloc)
 {
 	unsigned long i;
 	unsigned long pfn;
-	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+	int npages = memslot->npages;
 	unsigned long base_gfn = memslot->base_gfn;
 
 	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
@@ -1608,6 +1607,14 @@ int kvm_arch_set_memory_region(struct kv
 	return 0;
 }
 
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		struct kvm_memory_slot old,
+		int user_alloc)
+{
+	return;
+}
+
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 	kvm_flush_remote_tlbs(kvm);
Index: kvm-slotslock/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm-slotslock.orig/arch/s390/kvm/kvm-s390.c
+++ kvm-slotslock/arch/s390/kvm/kvm-s390.c
@@ -680,10 +680,10 @@ long kvm_arch_vcpu_ioctl(struct file *fi
 }
 
 /* Section: memory related */
-int kvm_arch_set_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem,
-				struct kvm_memory_slot old,
-				int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_userspace_memory_region *mem,
+				   struct kvm_memory_slot old,
+				   int user_alloc)
 {
 	int i;
 	struct kvm_vcpu *vcpu;
@@ -710,14 +710,20 @@ int kvm_arch_set_memory_region(struct kv
 	if (!user_alloc)
 		return -EINVAL;
 
+	return 0;
+}
+
+int kvm_arch_commit_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
 	/* request update of sie control block for all available vcpus */
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
 			continue;
 		kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
 	}
-
-	return 0;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
Index: kvm-slotslock/arch/x86/kvm/x86.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/x86.c
+++ kvm-slotslock/arch/x86/kvm/x86.c
@@ -4872,13 +4872,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm
 	kfree(kvm);
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
 				struct kvm_memory_slot old,
 				int user_alloc)
 {
-	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots->memslots[mem->slot];
+	int npages = memslot->npages;
 
 	/*To keep backward compatibility with older userspace,
 	 *x86 needs to hanlde !user_alloc case.
@@ -4898,26 +4897,35 @@ int kvm_arch_set_memory_region(struct kv
 			if (IS_ERR((void *)userspace_addr))
 				return PTR_ERR((void *)userspace_addr);
 
-			/* set userspace_addr atomically for kvm_hva_to_rmapp */
-			spin_lock(&kvm->mmu_lock);
 			memslot->userspace_addr = userspace_addr;
-			spin_unlock(&kvm->mmu_lock);
-		} else {
-			if (!old.user_alloc && old.rmap) {
-				int ret;
-
-				down_write(&current->mm->mmap_sem);
-				ret = do_munmap(current->mm, old.userspace_addr,
-						old.npages * PAGE_SIZE);
-				up_write(&current->mm->mmap_sem);
-				if (ret < 0)
-					printk(KERN_WARNING
-				       "kvm_vm_ioctl_set_memory_region: "
-				       "failed to munmap memory\n");
-			}
 		}
 	}
 
+
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
+
+	int npages = mem->memory_size >> PAGE_SHIFT;
+
+	if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
+		int ret;
+
+		down_write(&current->mm->mmap_sem);
+		ret = do_munmap(current->mm, old.userspace_addr,
+				old.npages * PAGE_SIZE);
+		up_write(&current->mm->mmap_sem);
+		if (ret < 0)
+			printk(KERN_WARNING
+			       "kvm_vm_ioctl_set_memory_region: "
+			       "failed to munmap memory\n");
+	}
+
 	spin_lock(&kvm->mmu_lock);
 	if (!kvm->arch.n_requested_mmu_pages) {
 		unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -4926,8 +4934,6 @@ int kvm_arch_set_memory_region(struct kv
 
 	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
 	spin_unlock(&kvm->mmu_lock);
-
-	return 0;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
Index: kvm-slotslock/include/linux/kvm_host.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm_host.h
+++ kvm-slotslock/include/linux/kvm_host.h
@@ -254,7 +254,11 @@ int kvm_set_memory_region(struct kvm *kv
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem,
 			    int user_alloc);
-int kvm_arch_set_memory_region(struct kvm *kvm,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
+				struct kvm_memory_slot old,
+				int user_alloc);
+void kvm_arch_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
 				struct kvm_memory_slot old,
 				int user_alloc);
Index: kvm-slotslock/virt/kvm/kvm_main.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/kvm_main.c
+++ kvm-slotslock/virt/kvm/kvm_main.c
@@ -644,6 +644,10 @@ skip_lpage:
 	if (!npages)
 		kvm_arch_flush_shadow(kvm);
 
+	r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
+	if (r)
+		goto out_free;
+
 	spin_lock(&kvm->mmu_lock);
 	if (mem->slot >= kvm->memslots->nmemslots)
 		kvm->memslots->nmemslots = mem->slot + 1;
@@ -651,13 +655,7 @@ skip_lpage:
 	*memslot = new;
 	spin_unlock(&kvm->mmu_lock);
 
-	r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
-	if (r) {
-		spin_lock(&kvm->mmu_lock);
-		*memslot = old;
-		spin_unlock(&kvm->mmu_lock);
-		goto out_free;
-	}
+	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
 
 	kvm_free_physmem_slot(&old, npages ? &new : NULL);
 	/* Slot deletion case: we have to update the current slot */



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 05/10] KVM: introduce gfn_to_pfn_memslot
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
                   ` (3 preceding siblings ...)
  2009-09-21 23:37 ` [patch 04/10] KVM: split kvm_arch_set_memory_region into prepare and commit Marcelo Tosatti
@ 2009-09-21 23:37 ` Marcelo Tosatti
  2009-09-21 23:37 ` [patch 06/10] KVM: use gfn_to_pfn_memslot in kvm_iommu_map_pages Marcelo Tosatti
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: introduce-gfn_to_pfn_memslot --]
[-- Type: text/plain, Size: 2192 bytes --]

Which takes a memslot pointer instead of using kvm->memslots.

To be used by SRCU convertion later.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-slotslock/include/linux/kvm_host.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm_host.h
+++ kvm-slotslock/include/linux/kvm_host.h
@@ -273,6 +273,8 @@ void kvm_set_page_dirty(struct page *pag
 void kvm_set_page_accessed(struct page *page);
 
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
+pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+			 struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
Index: kvm-slotslock/virt/kvm/kvm_main.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/kvm_main.c
+++ kvm-slotslock/virt/kvm/kvm_main.c
@@ -816,21 +816,14 @@ unsigned long gfn_to_hva(struct kvm *kvm
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+static pfn_t __gfn_to_pfn(struct kvm *kvm, unsigned long addr)
 {
 	struct page *page[1];
-	unsigned long addr;
 	int npages;
 	pfn_t pfn;
 
 	might_sleep();
 
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr)) {
-		get_page(bad_page);
-		return page_to_pfn(bad_page);
-	}
-
 	npages = get_user_pages_fast(addr, 1, 1, page);
 
 	if (unlikely(npages != 1)) {
@@ -855,8 +848,32 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t 
 	return pfn;
 }
 
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+{
+	unsigned long addr;
+
+	addr = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(addr)) {
+		get_page(bad_page);
+		return page_to_pfn(bad_page);
+	}
+
+	return __gfn_to_pfn(kvm, addr);
+}
 EXPORT_SYMBOL_GPL(gfn_to_pfn);
 
+static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
+}
+
+pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+			 struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
+	return __gfn_to_pfn(kvm, addr);
+}
+
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
 	pfn_t pfn;



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 06/10] KVM: use gfn_to_pfn_memslot in kvm_iommu_map_pages
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
                   ` (4 preceding siblings ...)
  2009-09-21 23:37 ` [patch 05/10] KVM: introduce gfn_to_pfn_memslot Marcelo Tosatti
@ 2009-09-21 23:37 ` Marcelo Tosatti
  2009-09-21 23:37 ` [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update Marcelo Tosatti
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: pass-memslot-to-kvm_iommu_map_pages --]
[-- Type: text/plain, Size: 2640 bytes --]

So its possible to iommu map a memslot before making it visible to 
kvm.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-slotslock/include/linux/kvm_host.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm_host.h
+++ kvm-slotslock/include/linux/kvm_host.h
@@ -439,8 +439,7 @@ void kvm_free_irq_source_id(struct kvm *
 #define KVM_IOMMU_CACHE_COHERENCY	0x1
 
 #ifdef CONFIG_IOMMU_API
-int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
-			unsigned long npages);
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
 int kvm_assign_device(struct kvm *kvm,
Index: kvm-slotslock/virt/kvm/iommu.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/iommu.c
+++ kvm-slotslock/virt/kvm/iommu.c
@@ -32,10 +32,10 @@ static int kvm_iommu_unmap_memslots(stru
 static void kvm_iommu_put_pages(struct kvm *kvm,
 				gfn_t base_gfn, unsigned long npages);
 
-int kvm_iommu_map_pages(struct kvm *kvm,
-			gfn_t base_gfn, unsigned long npages)
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
-	gfn_t gfn = base_gfn;
+	gfn_t gfn = slot->base_gfn;
+	unsigned long npages = slot->npages;
 	pfn_t pfn;
 	int i, r = 0;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
@@ -54,7 +54,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
 			continue;
 
-		pfn = gfn_to_pfn(kvm, gfn);
+		pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
 		r = iommu_map_range(domain,
 				    gfn_to_gpa(gfn),
 				    pfn_to_hpa(pfn),
@@ -69,7 +69,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	return 0;
 
 unmap_pages:
-	kvm_iommu_put_pages(kvm, base_gfn, i);
+	kvm_iommu_put_pages(kvm, slot->base_gfn, i);
 	return r;
 }
 
@@ -81,8 +81,7 @@ static int kvm_iommu_map_memslots(struct
 	slots = kvm->memslots;
 
 	for (i = 0; i < slots->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, slots->memslots[i].base_gfn,
-					slots->memslots[i].npages);
+		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
 		if (r)
 			break;
 	}
Index: kvm-slotslock/virt/kvm/kvm_main.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/kvm_main.c
+++ kvm-slotslock/virt/kvm/kvm_main.c
@@ -665,7 +665,7 @@ skip_lpage:
 	spin_unlock(&kvm->mmu_lock);
 #ifdef CONFIG_DMAR
 	/* map the pages in iommu page table */
-	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	r = kvm_iommu_map_pages(kvm, memslot);
 	if (r)
 		goto out;
 #endif



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
                   ` (5 preceding siblings ...)
  2009-09-21 23:37 ` [patch 06/10] KVM: use gfn_to_pfn_memslot in kvm_iommu_map_pages Marcelo Tosatti
@ 2009-09-21 23:37 ` Marcelo Tosatti
  2009-09-22  6:59   ` Avi Kivity
                     ` (2 more replies)
  2009-09-21 23:37 ` [patch 08/10] KVM: x86: switch kvm_set_memory_alias " Marcelo Tosatti
                   ` (3 subsequent siblings)
  10 siblings, 3 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: introduce-srcu-and-use-for-slots --]
[-- Type: text/plain, Size: 13379 bytes --]

Use two steps for memslot deletion: mark the slot invalid (which stops 
instantiation of new shadow pages for that slot, but allows destruction),
then instantiate the new empty slot.

Also simplifies kvm_handle_hva locking.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-slotslock/include/linux/kvm.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm.h
+++ kvm-slotslock/include/linux/kvm.h
@@ -39,7 +39,7 @@ struct kvm_userspace_memory_region {
 
 /* for kvm_memory_region::flags */
 #define KVM_MEM_LOG_DIRTY_PAGES  1UL
-
+#define KVM_MEMSLOT_INVALID      (1UL << 1)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
Index: kvm-slotslock/virt/kvm/kvm_main.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/kvm_main.c
+++ kvm-slotslock/virt/kvm/kvm_main.c
@@ -43,6 +43,7 @@
 #include <linux/swap.h>
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
+#include <linux/srcu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -352,11 +353,15 @@ static struct kvm *kvm_create_vm(void)
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
 		goto out_err;
+	if (init_srcu_struct(&kvm->srcu))
+		goto out_err;
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!page)
+	if (!page) {
+		cleanup_srcu_struct(&kvm->srcu);
 		goto out_err;
+	}
 
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
@@ -367,6 +372,7 @@ static struct kvm *kvm_create_vm(void)
 		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
 		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
 		if (r) {
+			cleanup_srcu_struct(&kvm->srcu);
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 			put_page(page);
 #endif
@@ -462,6 +468,7 @@ static void kvm_destroy_vm(struct kvm *k
 	kvm_arch_flush_shadow(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
+	cleanup_srcu_struct(&kvm->srcu);
 	hardware_disable_all();
 	mmdrop(mm);
 }
@@ -502,12 +509,13 @@ int __kvm_set_memory_region(struct kvm *
 			    struct kvm_userspace_memory_region *mem,
 			    int user_alloc)
 {
-	int r;
+	int r, flush_shadow = 0;
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long i;
 	struct kvm_memory_slot *memslot;
 	struct kvm_memory_slot old, new;
+	struct kvm_memslots *slots, *old_memslots;
 
 	r = -EINVAL;
 	/* General sanity checks */
@@ -569,15 +577,7 @@ int __kvm_set_memory_region(struct kvm *
 		memset(new.rmap, 0, npages * sizeof(*new.rmap));
 
 		new.user_alloc = user_alloc;
-		/*
-		 * hva_to_rmmap() serialzies with the mmu_lock and to be
-		 * safe it has to ignore memslots with !user_alloc &&
-		 * !userspace_addr.
-		 */
-		if (user_alloc)
-			new.userspace_addr = mem->userspace_addr;
-		else
-			new.userspace_addr = 0;
+		new.userspace_addr = mem->userspace_addr;
 	}
 	if (!npages)
 		goto skip_lpage;
@@ -632,8 +632,9 @@ skip_lpage:
 		if (!new.dirty_bitmap)
 			goto out_free;
 		memset(new.dirty_bitmap, 0, dirty_bytes);
+		/* destroy any largepage mappings for dirty tracking */
 		if (old.npages)
-			kvm_arch_flush_shadow(kvm);
+			flush_shadow = 1;
 	}
 #else  /* not defined CONFIG_S390 */
 	new.user_alloc = user_alloc;
@@ -641,34 +642,69 @@ skip_lpage:
 		new.userspace_addr = mem->userspace_addr;
 #endif /* not defined CONFIG_S390 */
 
-	if (!npages)
+	if (!npages) {
+		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+		if (!slots)
+			goto out_free;
+		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+		if (mem->slot >= slots->nmemslots)
+			slots->nmemslots = mem->slot + 1;
+		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+
+		old_memslots = kvm->memslots;
+		rcu_assign_pointer(kvm->memslots, slots);
+		synchronize_srcu(&kvm->srcu);
+		/* From this point no new shadow pages pointing to a deleted
+		 * memslot will be created.
+         	 *
+         	 * validation of sp->gfn happens in:
+         	 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+         	 * 	- kvm_is_visible_gfn (mmu_check_roots)
+         	 */
 		kvm_arch_flush_shadow(kvm);
+		kfree(old_memslots);
+	}
 
 	r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
 	if (r)
 		goto out_free;
 
-	spin_lock(&kvm->mmu_lock);
-	if (mem->slot >= kvm->memslots->nmemslots)
-		kvm->memslots->nmemslots = mem->slot + 1;
+#ifdef CONFIG_DMAR
+	/* map the pages in iommu page table */
+	if (npages)
+		r = kvm_iommu_map_pages(kvm, &new);
+		if (r)
+			goto out_free;
+#endif
 
-	*memslot = new;
-	spin_unlock(&kvm->mmu_lock);
+	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+	if (!slots)
+		goto out_free;
+	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+	if (mem->slot >= slots->nmemslots)
+		slots->nmemslots = mem->slot + 1;
+
+	/* actual memory is freed via old in kvm_free_physmem_slot below */
+	if (!npages) {
+		new.rmap = NULL;
+		new.dirty_bitmap = NULL;
+		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
+			new.lpage_info[i] = NULL;
+	}
+
+	slots->memslots[mem->slot] = new;
+	old_memslots = kvm->memslots;
+	rcu_assign_pointer(kvm->memslots, slots);
+	synchronize_srcu(&kvm->srcu);
 
 	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
 
-	kvm_free_physmem_slot(&old, npages ? &new : NULL);
-	/* Slot deletion case: we have to update the current slot */
-	spin_lock(&kvm->mmu_lock);
-	if (!npages)
-		*memslot = old;
-	spin_unlock(&kvm->mmu_lock);
-#ifdef CONFIG_DMAR
-	/* map the pages in iommu page table */
-	r = kvm_iommu_map_pages(kvm, memslot);
-	if (r)
-		goto out;
-#endif
+	kvm_free_physmem_slot(&old, &new);
+	kfree(old_memslots);
+
+	if (flush_shadow)
+		kvm_arch_flush_shadow(kvm);
+
 	return 0;
 
 out_free:
@@ -768,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -790,12 +826,15 @@ struct kvm_memory_slot *gfn_to_memslot(s
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
 	gfn = unalias_gfn(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
+		if (memslot->flags & KVM_MEMSLOT_INVALID)
+			continue;
+
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
 			return 1;
@@ -810,7 +849,7 @@ unsigned long gfn_to_hva(struct kvm *kvm
 
 	gfn = unalias_gfn(kvm, gfn);
 	slot = gfn_to_memslot_unaliased(kvm, gfn);
-	if (!slot)
+	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return bad_hva();
 	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
 }
Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
@@ -1834,6 +1834,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
+	down_write(&kvm->slots_lock);
 	spin_lock(&kvm->arch.dirty_log_lock);
 
 	r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1853,6 +1854,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
 	}
 	r = 0;
 out:
+	up_write(&kvm->slots_lock);
 	spin_unlock(&kvm->arch.dirty_log_lock);
 	return r;
 }
Index: kvm-slotslock/arch/x86/kvm/mmu.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/mmu.c
+++ kvm-slotslock/arch/x86/kvm/mmu.c
@@ -29,6 +29,7 @@
 #include <linux/swap.h>
 #include <linux/hugetlb.h>
 #include <linux/compiler.h>
+#include <linux/srcu.h>
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -766,23 +767,18 @@ static int kvm_unmap_rmapp(struct kvm *k
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
 {
-	int i, j;
+	int i, j, idx;
 	int retval = 0;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = rcu_dereference(kvm->memslots);
 
-	/*
-	 * If mmap_sem isn't taken, we can look the memslots with only
-	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
-	 */
 	for (i = 0; i < slots->nmemslots; i++) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 		unsigned long start = memslot->userspace_addr;
 		unsigned long end;
 
-		/* mmu_lock protects userspace_addr */
-		if (!start)
-			continue;
-
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
@@ -797,6 +793,7 @@ static int kvm_handle_hva(struct kvm *kv
 			}
 		}
 	}
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	return retval;
 }
@@ -2966,16 +2963,20 @@ nomem:
  */
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 {
-	int i;
+	int i, idx;
 	unsigned int nr_mmu_pages;
 	unsigned int  nr_pages = 0;
+	struct kvm_memslots *slots;
 
-	for (i = 0; i < kvm->memslots->nmemslots; i++)
-		nr_pages += kvm->memslots->memslots[i].npages;
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = rcu_dereference(kvm->memslots);
+	for (i = 0; i < slots->nmemslots; i++)
+		nr_pages += slots->memslots[i].npages;
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 	nr_mmu_pages = max(nr_mmu_pages,
 			(unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	return nr_mmu_pages;
 }
@@ -3241,10 +3242,12 @@ static void audit_mappings(struct kvm_vc
 static int count_rmaps(struct kvm_vcpu *vcpu)
 {
 	int nmaps = 0;
-	int i, j, k;
+	int i, j, k, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = rcu_dereference(kvm->memslots);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
+		struct kvm_memory_slot *m = &slots->memslots[i];
 		struct kvm_rmap_desc *d;
 
 		for (j = 0; j < m->npages; ++j) {
@@ -3267,6 +3270,7 @@ static int count_rmaps(struct kvm_vcpu *
 			}
 		}
 	}
+	srcu_read_unlock(&kvm->srcu, idx);
 	return nmaps;
 }
 
Index: kvm-slotslock/arch/x86/kvm/vmx.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/vmx.c
+++ kvm-slotslock/arch/x86/kvm/vmx.c
@@ -24,6 +24,7 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/sched.h>
+#include <linux/srcu.h>
 #include <linux/moduleparam.h>
 #include <linux/ftrace_event.h>
 #include "kvm_cache_regs.h"
@@ -1465,10 +1466,18 @@ static void enter_pmode(struct kvm_vcpu 
 static gva_t rmode_tss_base(struct kvm *kvm)
 {
 	if (!kvm->arch.tss_addr) {
-		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
-				 kvm->memslots->memslots[0].npages - 3;
+		struct kvm_memslots *slots;
+		gfn_t base_gfn;
+		int idx;
+
+		idx = srcu_read_lock(&kvm->srcu);
+		slots = rcu_dereference(kvm->memslots);
+ 		base_gfn = slots->memslots[0].base_gfn +
+				 slots->memslots[0].npages - 3;
+		srcu_read_unlock(&kvm->srcu, idx);
 		return base_gfn << PAGE_SHIFT;
 	}
+
 	return kvm->arch.tss_addr;
 }
 
Index: kvm-slotslock/include/linux/kvm_host.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm_host.h
+++ kvm-slotslock/include/linux/kvm_host.h
@@ -163,6 +163,7 @@ struct kvm {
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
+	struct srcu_struct srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 	struct kvm_vcpu *bsp_vcpu;
Index: kvm-slotslock/virt/kvm/assigned-dev.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/assigned-dev.c
+++ kvm-slotslock/virt/kvm/assigned-dev.c
@@ -504,11 +504,11 @@ out:
 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 				      struct kvm_assigned_pci_dev *assigned_dev)
 {
-	int r = 0;
+	int r = 0, idx;
 	struct kvm_assigned_dev_kernel *match;
 	struct pci_dev *dev;
 
-	down_read(&kvm->slots_lock);
+	idx = srcu_read_lock(&kvm->srcu);
 	mutex_lock(&kvm->lock);
 
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
@@ -574,7 +574,7 @@ static int kvm_vm_ioctl_assign_device(st
 
 out:
 	mutex_unlock(&kvm->lock);
-	up_read(&kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 	return r;
 out_list_del:
 	list_del(&match->list);
@@ -586,7 +586,7 @@ out_put:
 out_free:
 	kfree(match);
 	mutex_unlock(&kvm->lock);
-	up_read(&kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 	return r;
 }
 
Index: kvm-slotslock/virt/kvm/iommu.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/iommu.c
+++ kvm-slotslock/virt/kvm/iommu.c
@@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct
 	int i, r = 0;
 	struct kvm_memslots *slots;
 
-	slots = kvm->memslots;
+	slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; i++) {
 		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
@@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(stru
 	int i;
 	struct kvm_memslots *slots;
 
-	slots = kvm->memslots;
+	slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; i++) {
 		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 08/10] KVM: x86: switch kvm_set_memory_alias to SRCU update
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
                   ` (6 preceding siblings ...)
  2009-09-21 23:37 ` [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update Marcelo Tosatti
@ 2009-09-21 23:37 ` Marcelo Tosatti
  2009-09-22  7:04   ` Avi Kivity
  2009-09-21 23:37 ` [patch 09/10] KVM: convert io_bus to SRCU Marcelo Tosatti
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: srcu-alias-update --]
[-- Type: text/plain, Size: 8783 bytes --]

Using a similar two-step procedure as for memslots.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-slotslock/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm-slotslock.orig/arch/x86/include/asm/kvm_host.h
+++ kvm-slotslock/arch/x86/include/asm/kvm_host.h
@@ -373,10 +373,13 @@ struct kvm_vcpu_arch {
 	u64 *mce_banks;
 };
 
+#define KVM_ALIAS_INVALID     1UL
+
 struct kvm_mem_alias {
 	gfn_t base_gfn;
 	unsigned long npages;
 	gfn_t target_gfn;
+	unsigned long flags;
 };
 
 struct kvm_mem_aliases {
Index: kvm-slotslock/arch/x86/kvm/mmu.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/mmu.c
+++ kvm-slotslock/arch/x86/kvm/mmu.c
@@ -424,7 +424,7 @@ static void account_shadowed(struct kvm 
 	int *write_count;
 	int i;
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn(kvm, gfn, false);
 
 	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	for (i = PT_DIRECTORY_LEVEL;
@@ -440,7 +440,7 @@ static void unaccount_shadowed(struct kv
 	int *write_count;
 	int i;
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn(kvm, gfn, false);
 	for (i = PT_DIRECTORY_LEVEL;
 	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
 		slot          = gfn_to_memslot_unaliased(kvm, gfn);
@@ -457,7 +457,7 @@ static int has_wrprotected_page(struct k
 	struct kvm_memory_slot *slot;
 	int *largepage_idx;
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn(kvm, gfn, false);
 	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (slot) {
 		largepage_idx = slot_largepage_idx(gfn, slot, level);
@@ -565,7 +565,7 @@ static int rmap_add(struct kvm_vcpu *vcp
 
 	if (!is_rmap_spte(*spte))
 		return count;
-	gfn = unalias_gfn(vcpu->kvm, gfn);
+	gfn = unalias_gfn(vcpu->kvm, gfn, false);
 	sp = page_header(__pa(spte));
 	sp->gfns[spte - sp->spt] = gfn;
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
@@ -703,7 +703,7 @@ static int rmap_write_protect(struct kvm
 	u64 *spte;
 	int i, write_protected = 0;
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn(kvm, gfn, false);
 	rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL);
 
 	spte = rmap_next(kvm, rmapp, NULL);
@@ -836,7 +836,7 @@ static void rmap_recycle(struct kvm_vcpu
 
 	sp = page_header(__pa(spte));
 
-	gfn = unalias_gfn(vcpu->kvm, gfn);
+	gfn = unalias_gfn(vcpu->kvm, gfn, false);
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
 	kvm_unmap_rmapp(vcpu->kvm, rmapp);
@@ -3358,7 +3358,7 @@ static void audit_write_protection(struc
 		if (sp->unsync)
 			continue;
 
-		gfn = unalias_gfn(vcpu->kvm, sp->gfn);
+		gfn = unalias_gfn(vcpu->kvm, sp->gfn, false);
 		slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
 		rmapp = &slot->rmap[gfn - slot->base_gfn];
 
Index: kvm-slotslock/arch/x86/kvm/x86.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/x86.c
+++ kvm-slotslock/arch/x86/kvm/x86.c
@@ -37,6 +37,7 @@
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
+#include <linux/srcu.h>
 #include <trace/events/kvm.h>
 #undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
@@ -1957,14 +1958,18 @@ static int kvm_vm_ioctl_get_nr_mmu_pages
 	return kvm->arch.n_alloc_mmu_pages;
 }
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn, bool instantiation)
 {
 	int i;
 	struct kvm_mem_alias *alias;
-	struct kvm_mem_aliases *aliases = kvm->arch.aliases;
+	struct kvm_mem_aliases *aliases;
+
+	aliases = rcu_dereference(kvm->arch.aliases);
 
 	for (i = 0; i < aliases->naliases; ++i) {
 		alias = &aliases->aliases[i];
+		if (instantiation && (alias->flags & KVM_ALIAS_INVALID))
+			continue;
 		if (gfn >= alias->base_gfn
 		    && gfn < alias->base_gfn + alias->npages)
 			return alias->target_gfn + gfn - alias->base_gfn;
@@ -1982,7 +1987,7 @@ static int kvm_vm_ioctl_set_memory_alias
 {
 	int r, n;
 	struct kvm_mem_alias *p;
-	struct kvm_mem_aliases *aliases;
+	struct kvm_mem_aliases *aliases, *old_aliases;
 
 	r = -EINVAL;
 	/* General sanity checks */
@@ -1999,23 +2004,43 @@ static int kvm_vm_ioctl_set_memory_alias
 	    < alias->target_phys_addr)
 		goto out;
 
+	r = -ENOMEM;
+	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+	if (!aliases)
+		goto out;
+
 	down_write(&kvm->slots_lock);
-	spin_lock(&kvm->mmu_lock);
 
-	aliases = kvm->arch.aliases;
+	/* invalidate any gfn reference in case of deletion/shrinking */
+	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
+	aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
+	old_aliases = kvm->arch.aliases;
+	rcu_assign_pointer(kvm->arch.aliases, aliases);
+	synchronize_srcu(&kvm->srcu);
+	kvm_mmu_zap_all(kvm);
+	kfree(old_aliases);
+
+	r = -ENOMEM;
+	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+	if (!aliases)
+		goto out;
+	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
 
 	p = &aliases->aliases[alias->slot];
 	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
 	p->npages = alias->memory_size >> PAGE_SHIFT;
 	p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
+	p->flags &= ~(KVM_ALIAS_INVALID);
 
 	for (n = KVM_ALIAS_SLOTS; n > 0; --n)
 		if (aliases->aliases[n - 1].npages)
 			break;
 	aliases->naliases = n;
 
-	spin_unlock(&kvm->mmu_lock);
-	kvm_mmu_zap_all(kvm);
+	old_aliases = kvm->arch.aliases;
+	rcu_assign_pointer(kvm->arch.aliases, aliases);
+	synchronize_srcu(&kvm->srcu);
+	kfree(old_aliases);
 
 	up_write(&kvm->slots_lock);
 
Index: kvm-slotslock/include/linux/kvm_host.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm_host.h
+++ kvm-slotslock/include/linux/kvm_host.h
@@ -265,7 +265,7 @@ void kvm_arch_commit_memory_region(struc
 				int user_alloc);
 void kvm_disable_largepages(void);
 void kvm_arch_flush_shadow(struct kvm *kvm);
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn, bool instantiation);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
Index: kvm-slotslock/virt/kvm/kvm_main.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/kvm_main.c
+++ kvm-slotslock/virt/kvm/kvm_main.c
@@ -819,7 +819,7 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot_unalias
 
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 {
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn(kvm, gfn, false);
 	return gfn_to_memslot_unaliased(kvm, gfn);
 }
 
@@ -828,7 +828,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, 
 	int i;
 	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn(kvm, gfn, true);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
@@ -847,7 +847,7 @@ unsigned long gfn_to_hva(struct kvm *kvm
 {
 	struct kvm_memory_slot *slot;
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn(kvm, gfn, true);
 	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return bad_hva();
@@ -1117,7 +1117,7 @@ void mark_page_dirty(struct kvm *kvm, gf
 {
 	struct kvm_memory_slot *memslot;
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn(kvm, gfn, false);
 	memslot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
@@ -1946,7 +1946,7 @@ int kvm_cpu_has_pending_timer(struct kvm
 	return vcpu->arch.timer_fired;
 }
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn, bool instantiation)
 {
 	return gfn;
 }
Index: kvm-slotslock/arch/powerpc/kvm/powerpc.c
===================================================================
--- kvm-slotslock.orig/arch/powerpc/kvm/powerpc.c
+++ kvm-slotslock/arch/powerpc/kvm/powerpc.c
@@ -34,7 +34,7 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn, bool instantiation)
 {
 	return gfn;
 }
Index: kvm-slotslock/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm-slotslock.orig/arch/s390/kvm/kvm-s390.c
+++ kvm-slotslock/arch/s390/kvm/kvm-s390.c
@@ -730,7 +730,7 @@ void kvm_arch_flush_shadow(struct kvm *k
 {
 }
 
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn, bool instantiation)
 {
 	return gfn;
 }



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 09/10] KVM: convert io_bus to SRCU
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
                   ` (7 preceding siblings ...)
  2009-09-21 23:37 ` [patch 08/10] KVM: x86: switch kvm_set_memory_alias " Marcelo Tosatti
@ 2009-09-21 23:37 ` Marcelo Tosatti
  2009-09-21 23:37 ` [patch 10/10] KVM: switch vcpu context to use SRCU Marcelo Tosatti
  2009-09-22  7:09 ` [patch 00/10] RFC: " Avi Kivity
  10 siblings, 0 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: srcu-iobus-update --]
[-- Type: text/plain, Size: 14452 bytes --]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-slotslock/arch/x86/kvm/i8254.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/i8254.c
+++ kvm-slotslock/arch/x86/kvm/i8254.c
@@ -640,13 +640,13 @@ struct kvm_pit *kvm_create_pit(struct kv
 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
 	if (ret < 0)
 		goto fail;
 
 	if (flags & KVM_PIT_SPEAKER_DUMMY) {
 		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
-		ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
+		ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
 						&pit->speaker_dev);
 		if (ret < 0)
 			goto fail_unregister;
@@ -655,7 +655,7 @@ struct kvm_pit *kvm_create_pit(struct kv
 	return pit;
 
 fail_unregister:
-	__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
+	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
 
 fail:
 	if (pit->irq_source_id >= 0)
Index: kvm-slotslock/arch/x86/kvm/i8259.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/i8259.c
+++ kvm-slotslock/arch/x86/kvm/i8259.c
@@ -533,7 +533,9 @@ struct kvm_pic *kvm_create_pic(struct kv
 	 * Initialize PIO device
 	 */
 	kvm_iodevice_init(&s->dev, &picdev_ops);
-	ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
+	down_write(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+	up_write(&kvm->slots_lock);
 	if (ret < 0) {
 		kfree(s);
 		return NULL;
Index: kvm-slotslock/include/linux/kvm_host.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm_host.h
+++ kvm-slotslock/include/linux/kvm_host.h
@@ -58,20 +58,20 @@ struct kvm_io_bus {
 	struct kvm_io_device *devs[NR_IOBUS_DEVS];
 };
 
-void kvm_io_bus_init(struct kvm_io_bus *bus);
-void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len,
-		     const void *val);
-int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len,
+enum kvm_bus {
+	KVM_MMIO_BUS,
+	KVM_PIO_BUS,
+	KVM_NR_BUSES
+};
+
+int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+		     int len, const void *val);
+int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
 		    void *val);
-int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev);
-int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			    struct kvm_io_device *dev);
-void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
-				 struct kvm_io_device *dev);
-void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev);
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			      struct kvm_io_device *dev);
 
 struct kvm_vcpu {
 	struct kvm *kvm;
@@ -172,8 +172,7 @@ struct kvm {
 	atomic_t online_vcpus;
 	struct list_head vm_list;
 	struct mutex lock;
-	struct kvm_io_bus mmio_bus;
-	struct kvm_io_bus pio_bus;
+	struct kvm_io_bus *buses[KVM_NR_BUSES];
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 	struct {
 		spinlock_t        lock;
Index: kvm-slotslock/virt/kvm/eventfd.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/eventfd.c
+++ kvm-slotslock/virt/kvm/eventfd.c
@@ -451,7 +451,7 @@ static int
 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p;
 	struct eventfd_ctx       *eventfd;
 	int                       ret;
@@ -506,7 +506,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, st
 
 	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
 
-	ret = __kvm_io_bus_register_dev(bus, &p->dev);
+	ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
 	if (ret < 0)
 		goto unlock_fail;
 
@@ -530,7 +530,7 @@ static int
 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p, *tmp;
 	struct eventfd_ctx       *eventfd;
 	int                       ret = -ENOENT;
@@ -553,7 +553,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, 
 		if (!p->wildcard && p->datamatch != args->datamatch)
 			continue;
 
-		__kvm_io_bus_unregister_dev(bus, &p->dev);
+		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
 		ioeventfd_release(p);
 		ret = 0;
 		break;
Index: kvm-slotslock/virt/kvm/ioapic.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/ioapic.c
+++ kvm-slotslock/virt/kvm/ioapic.c
@@ -372,7 +372,9 @@ int kvm_ioapic_init(struct kvm *kvm)
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
 	ioapic->kvm = kvm;
-	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
+	down_write(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+	up_write(&kvm->slots_lock);
 	if (ret < 0)
 		kfree(ioapic);
 
Index: kvm-slotslock/virt/kvm/kvm_main.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/kvm_main.c
+++ kvm-slotslock/virt/kvm/kvm_main.c
@@ -85,6 +85,8 @@ static long kvm_vcpu_ioctl(struct file *
 static int hardware_enable_all(void);
 static void hardware_disable_all(void);
 
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+
 static bool kvm_rebooting;
 
 static bool largepages_enabled = true;
@@ -331,7 +333,7 @@ static const struct mmu_notifier_ops kvm
 
 static struct kvm *kvm_create_vm(void)
 {
-	int r = 0;
+	int r = 0, i;
 	struct kvm *kvm = kvm_arch_create_vm();
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	struct page *page;
@@ -355,6 +357,14 @@ static struct kvm *kvm_create_vm(void)
 		goto out_err;
 	if (init_srcu_struct(&kvm->srcu))
 		goto out_err;
+ 	for (i = 0; i < KVM_NR_BUSES; i++) {
+ 		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
+ 					GFP_KERNEL);
+ 		if (!kvm->buses[i]) {
+ 			cleanup_srcu_struct(&kvm->srcu);
+ 			goto out_err;
+ 		}
+ 	}
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -385,11 +395,9 @@ static struct kvm *kvm_create_vm(void)
 	atomic_inc(&kvm->mm->mm_count);
 	spin_lock_init(&kvm->mmu_lock);
 	spin_lock_init(&kvm->requests_lock);
-	kvm_io_bus_init(&kvm->pio_bus);
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
-	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
 	spin_lock(&kvm_lock);
@@ -404,6 +412,8 @@ out:
 out_err:
 	hardware_disable_all();
 out_err_nodisable:
+ 	for (i = 0; i < KVM_NR_BUSES; i++)
+ 		kfree(kvm->buses[i]);
 	kfree(kvm->memslots);
 	kfree(kvm);
 	return ERR_PTR(r);
@@ -449,6 +459,7 @@ void kvm_free_physmem(struct kvm *kvm)
 
 static void kvm_destroy_vm(struct kvm *kvm)
 {
+	int i;
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
@@ -456,8 +467,8 @@ static void kvm_destroy_vm(struct kvm *k
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
-	kvm_io_bus_destroy(&kvm->pio_bus);
-	kvm_io_bus_destroy(&kvm->mmio_bus);
+	for (i = 0; i < KVM_NR_BUSES; i++)
+		kvm_io_bus_destroy(kvm->buses[i]);
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	if (kvm->coalesced_mmio_ring != NULL)
 		free_page((unsigned long)kvm->coalesced_mmio_ring);
@@ -1838,12 +1849,7 @@ static struct notifier_block kvm_reboot_
 	.priority = 0,
 };
 
-void kvm_io_bus_init(struct kvm_io_bus *bus)
-{
-	memset(bus, 0, sizeof(*bus));
-}
-
-void kvm_io_bus_destroy(struct kvm_io_bus *bus)
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 {
 	int i;
 
@@ -1852,13 +1858,15 @@ void kvm_io_bus_destroy(struct kvm_io_bu
 
 		kvm_iodevice_destructor(pos);
 	}
+	kfree(bus);
 }
 
 /* kvm_io_bus_write - called under kvm->slots_lock */
-int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
+int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val)
 {
 	int i;
+	struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
 	for (i = 0; i < bus->dev_count; i++)
 		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
 			return 0;
@@ -1866,59 +1874,71 @@ int kvm_io_bus_write(struct kvm_io_bus *
 }
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
-int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
+int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+		    int len, void *val)
 {
 	int i;
+	struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
+
 	for (i = 0; i < bus->dev_count; i++)
 		if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
 			return 0;
 	return -EOPNOTSUPP;
 }
 
-int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
-			     struct kvm_io_device *dev)
+/* Caller must have write lock on slots_lock. */
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			    struct kvm_io_device *dev)
 {
-	int ret;
+	struct kvm_io_bus *new_bus, *bus;
 
-	down_write(&kvm->slots_lock);
-	ret = __kvm_io_bus_register_dev(bus, dev);
-	up_write(&kvm->slots_lock);
-
-	return ret;
-}
-
-/* An unlocked version. Caller must have write lock on slots_lock. */
-int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
-			      struct kvm_io_device *dev)
-{
+	bus = kvm->buses[bus_idx];
 	if (bus->dev_count > NR_IOBUS_DEVS-1)
 		return -ENOSPC;
 
-	bus->devs[bus->dev_count++] = dev;
+	new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+	if (!new_bus)
+		return -ENOMEM;
+	memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+	new_bus->devs[new_bus->dev_count++] = dev;
+	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+	synchronize_srcu(&kvm->srcu);
+	kfree(bus);
 
 	return 0;
 }
 
-void kvm_io_bus_unregister_dev(struct kvm *kvm,
-			       struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev)
+/* Caller must have write lock on slots_lock. */
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			      struct kvm_io_device *dev)
 {
-	down_write(&kvm->slots_lock);
-	__kvm_io_bus_unregister_dev(bus, dev);
-	up_write(&kvm->slots_lock);
-}
+	int i, r;
+	struct kvm_io_bus *new_bus, *bus;
 
-/* An unlocked version. Caller must have write lock on slots_lock. */
-void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
-				 struct kvm_io_device *dev)
-{
-	int i;
+	new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+	if (!new_bus)
+		return -ENOMEM;
 
-	for (i = 0; i < bus->dev_count; i++)
-		if (bus->devs[i] == dev) {
-			bus->devs[i] = bus->devs[--bus->dev_count];
+	bus = kvm->buses[bus_idx];
+	memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+
+	r = -ENOENT;
+	for (i = 0; i < new_bus->dev_count; i++)
+		if (new_bus->devs[i] == dev) {
+			r = 0;
+			new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
 			break;
 		}
+
+	if (r) {
+		kfree(new_bus);
+		return r;
+	}
+
+	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+	synchronize_srcu(&kvm->srcu);
+	kfree(bus);
+	return r;
 }
 
 static struct notifier_block kvm_cpu_notifier = {
Index: kvm-slotslock/virt/kvm/coalesced_mmio.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/coalesced_mmio.c
+++ kvm-slotslock/virt/kvm/coalesced_mmio.c
@@ -102,7 +102,9 @@ int kvm_coalesced_mmio_init(struct kvm *
 	dev->kvm = kvm;
 	kvm->coalesced_mmio_dev = dev;
 
-	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
+	down_write(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+	up_write(&kvm->slots_lock);
 	if (ret < 0)
 		kfree(dev);
 
Index: kvm-slotslock/arch/x86/kvm/x86.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/x86.c
+++ kvm-slotslock/arch/x86/kvm/x86.c
@@ -2489,7 +2489,7 @@ static int vcpu_mmio_write(struct kvm_vc
 	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
 		return 0;
 
-	return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
+	return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 }
 
 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -2498,7 +2498,7 @@ static int vcpu_mmio_read(struct kvm_vcp
 	    !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
 		return 0;
 
-	return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
+	return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 }
 
 static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
@@ -2983,11 +2983,12 @@ static int kernel_pio(struct kvm_vcpu *v
 	int r;
 
 	if (vcpu->arch.pio.in)
-		r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+		r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
 				    vcpu->arch.pio.size, pd);
 	else
-		r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
-				     vcpu->arch.pio.size, pd);
+		r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+				     vcpu->arch.pio.port, vcpu->arch.pio.size,
+				     pd);
 	return r;
 }
 
@@ -2998,7 +2999,7 @@ static int pio_string_write(struct kvm_v
 	int i, r = 0;
 
 	for (i = 0; i < io->cur_count; i++) {
-		if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
+		if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
 				     io->port, io->size, pd)) {
 			r = -EOPNOTSUPP;
 			break;
Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
@@ -241,10 +241,10 @@ static int handle_mmio(struct kvm_vcpu *
 	return 0;
 mmio:
 	if (p->dir)
-		r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
+		r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
 				    p->size, &p->data);
 	else
-		r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
+		r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
 				     p->size, &p->data);
 	if (r)
 		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);



^ permalink raw reply	[flat|nested] 26+ messages in thread

* [patch 10/10] KVM: switch vcpu context to use SRCU
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
                   ` (8 preceding siblings ...)
  2009-09-21 23:37 ` [patch 09/10] KVM: convert io_bus to SRCU Marcelo Tosatti
@ 2009-09-21 23:37 ` Marcelo Tosatti
  2009-09-22  7:07   ` Avi Kivity
  2009-09-22  7:09 ` [patch 00/10] RFC: " Avi Kivity
  10 siblings, 1 reply; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-21 23:37 UTC (permalink / raw)
  To: kvm; +Cc: avi, Marcelo Tosatti

[-- Attachment #1: switch-to-memslot-srcu --]
[-- Type: text/plain, Size: 7649 bytes --]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm-slotslock/arch/x86/kvm/x86.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/x86.c
+++ kvm-slotslock/arch/x86/kvm/x86.c
@@ -1140,15 +1140,15 @@ static int __msr_io(struct kvm_vcpu *vcp
 		    int (*do_msr)(struct kvm_vcpu *vcpu,
 				  unsigned index, u64 *data))
 {
-	int i;
+	int i, idx;
 
 	vcpu_load(vcpu);
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	for (i = 0; i < msrs->nmsrs; ++i)
 		if (do_msr(vcpu, entries[i].index, &entries[i].data))
 			break;
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
 	vcpu_put(vcpu);
 
@@ -3531,16 +3531,17 @@ static void vapic_enter(struct kvm_vcpu 
 static void vapic_exit(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
+	int idx;
 
 	if (!apic || !apic->vapic_addr)
 		return;
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	kvm_release_page_dirty(apic->vapic_page);
 	spin_lock(&vcpu->kvm->mmu_lock);
 	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
 	spin_unlock(&vcpu->kvm->mmu_lock);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -3669,7 +3670,7 @@ static int vcpu_enter_guest(struct kvm_v
 		kvm_lapic_sync_to_vapic(vcpu);
 	}
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
 	kvm_guest_enter();
 
@@ -3710,7 +3711,7 @@ static int vcpu_enter_guest(struct kvm_v
 
 	preempt_enable();
 
-	down_read(&vcpu->kvm->slots_lock);
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	/*
 	 * Profile KVM exit RIPs:
@@ -3732,6 +3733,7 @@ out:
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int r;
+	struct kvm *kvm = vcpu->kvm;
 
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
 		pr_debug("vcpu %d received sipi with vector # %x\n",
@@ -3743,7 +3745,7 @@ static int __vcpu_run(struct kvm_vcpu *v
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	}
 
-	down_read(&vcpu->kvm->slots_lock);
+	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 	vapic_enter(vcpu);
 
 	r = 1;
@@ -3751,9 +3753,9 @@ static int __vcpu_run(struct kvm_vcpu *v
 		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
 			r = vcpu_enter_guest(vcpu);
 		else {
-			up_read(&vcpu->kvm->slots_lock);
+			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 			kvm_vcpu_block(vcpu);
-			down_read(&vcpu->kvm->slots_lock);
+			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
 			{
 				switch(vcpu->arch.mp_state) {
@@ -3788,13 +3790,13 @@ static int __vcpu_run(struct kvm_vcpu *v
 			++vcpu->stat.signal_exits;
 		}
 		if (need_resched()) {
-			up_read(&vcpu->kvm->slots_lock);
+			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 			kvm_resched(vcpu);
-			down_read(&vcpu->kvm->slots_lock);
+			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 		}
 	}
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 	post_kvm_run_save(vcpu);
 
 	vapic_exit(vcpu);
@@ -3834,10 +3836,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
 		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
 
-		down_read(&vcpu->kvm->slots_lock);
+		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
 					EMULTYPE_NO_DECODE);
-		up_read(&vcpu->kvm->slots_lock);
+		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 		if (r == EMULATE_DO_MMIO) {
 			/*
 			 * Read-modify-write.  Back to userspace.
@@ -4575,11 +4577,12 @@ int kvm_arch_vcpu_ioctl_translate(struct
 {
 	unsigned long vaddr = tr->linear_address;
 	gpa_t gpa;
+	int idx;
 
 	vcpu_load(vcpu);
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	tr->physical_address = gpa;
 	tr->valid = gpa != UNMAPPED_GVA;
 	tr->writeable = 1;
@@ -4818,10 +4821,12 @@ fail:
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
+	int idx;
+
 	kvm_free_lapic(vcpu);
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	kvm_mmu_destroy(vcpu);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	free_page((unsigned long)vcpu->arch.pio_data);
 }
 
Index: kvm-slotslock/include/linux/kvm_host.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm_host.h
+++ kvm-slotslock/include/linux/kvm_host.h
@@ -84,6 +84,8 @@ struct kvm_vcpu {
 	struct kvm_run *run;
 	unsigned long requests;
 	unsigned long guest_debug;
+	int srcu_idx;
+
 	int fpu_active;
 	int guest_fpu_loaded;
 	wait_queue_head_t wq;
Index: kvm-slotslock/arch/x86/kvm/mmu.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/mmu.c
+++ kvm-slotslock/arch/x86/kvm/mmu.c
@@ -2883,10 +2883,9 @@ static int mmu_shrink(int nr_to_scan, gf
 	spin_lock(&kvm_lock);
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
-		int npages;
+		int npages, idx;
 
-		if (!down_read_trylock(&kvm->slots_lock))
-			continue;
+		idx = srcu_read_lock(&kvm->srcu);
 		spin_lock(&kvm->mmu_lock);
 		npages = kvm->arch.n_alloc_mmu_pages -
 			 kvm->arch.n_free_mmu_pages;
@@ -2899,7 +2898,7 @@ static int mmu_shrink(int nr_to_scan, gf
 		nr_to_scan--;
 
 		spin_unlock(&kvm->mmu_lock);
-		up_read(&kvm->slots_lock);
+		srcu_read_unlock(&kvm->srcu, idx);
 	}
 	if (kvm_freed)
 		list_move_tail(&kvm_freed->vm_list, &vm_list);
Index: kvm-slotslock/arch/x86/kvm/vmx.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/vmx.c
+++ kvm-slotslock/arch/x86/kvm/vmx.c
@@ -2429,10 +2429,10 @@ static int vmx_vcpu_reset(struct kvm_vcp
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u64 msr;
-	int ret;
+	int ret, idx;
 
 	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	if (!init_rmode(vmx->vcpu.kvm)) {
 		ret = -ENOMEM;
 		goto out;
@@ -2540,7 +2540,7 @@ static int vmx_vcpu_reset(struct kvm_vcp
 	vmx->emulation_required = 0;
 
 out:
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	return ret;
 }
 
Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
@@ -636,12 +636,9 @@ static void kvm_vcpu_post_transition(str
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	union context *host_ctx, *guest_ctx;
-	int r;
+	int r, idx;
 
-	/*
-	 * down_read() may sleep and return with interrupts enabled
-	 */
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 again:
 	if (signal_pending(current)) {
@@ -663,7 +660,7 @@ again:
 	if (r < 0)
 		goto vcpu_run_fail;
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	kvm_guest_enter();
 
 	/*
@@ -687,7 +684,7 @@ again:
 	kvm_guest_exit();
 	preempt_enable();
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	r = kvm_handle_exit(kvm_run, vcpu);
 
@@ -697,10 +694,10 @@ again:
 	}
 
 out:
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	if (r > 0) {
 		kvm_resched(vcpu);
-		down_read(&vcpu->kvm->slots_lock);
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		goto again;
 	}
 



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 03/10] KVM: switch dirty_log to mmu_lock protection
  2009-09-21 23:37 ` [patch 03/10] KVM: switch dirty_log to mmu_lock protection Marcelo Tosatti
@ 2009-09-22  6:37   ` Avi Kivity
  2009-09-22 12:44     ` Marcelo Tosatti
  0 siblings, 1 reply; 26+ messages in thread
From: Avi Kivity @ 2009-09-22  6:37 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
> get_dirty_log vs mark_page_dirty need to be mutually exclusive. Switch
> to mmu_lock protection.
>
>    

I'm not sure all archs use mmu_lock.  Also, I'm unhappy about 
introducing more use (especially as it's often unnecessary, if dirty 
logging is turned off for the slot for example).

I think you can use rcu for this as well.  When you read the log, 
allocate a new empty bitmap, switch the memslots pointer to include it, 
and synchronize_srcu().  Now we are certain everyone is using the new 
bitmap we can copy the old one to usespace and delete it.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 04/10] KVM: split kvm_arch_set_memory_region into prepare and commit
  2009-09-21 23:37 ` [patch 04/10] KVM: split kvm_arch_set_memory_region into prepare and commit Marcelo Tosatti
@ 2009-09-22  6:40   ` Avi Kivity
  0 siblings, 0 replies; 26+ messages in thread
From: Avi Kivity @ 2009-09-22  6:40 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
> Required for SRCU convertion later.
>
>    

My feeling is that prepare/commit can be common code and only some small 
bits (like allocating x86 rmaps) can be arch specific.  That's for 
another patchset though.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
  2009-09-21 23:37 ` [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update Marcelo Tosatti
@ 2009-09-22  6:59   ` Avi Kivity
  2009-09-22 16:16     ` Marcelo Tosatti
  2009-09-22 10:40   ` Fernando Carrijo
  2009-09-24 14:06   ` Marcelo Tosatti
  2 siblings, 1 reply; 26+ messages in thread
From: Avi Kivity @ 2009-09-22  6:59 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
> Use two steps for memslot deletion: mark the slot invalid (which stops
> instantiation of new shadow pages for that slot, but allows destruction),
> then instantiate the new empty slot.
>
> Also simplifies kvm_handle_hva locking.
>
>   unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
>   {
> -	int i;
> +	int i, idx;
>   	unsigned int nr_mmu_pages;
>   	unsigned int  nr_pages = 0;
> +	struct kvm_memslots *slots;
>
> -	for (i = 0; i<  kvm->memslots->nmemslots; i++)
> -		nr_pages += kvm->memslots->memslots[i].npages;
> +	idx = srcu_read_lock(&kvm->srcu);
>    

Doesn't the caller hold the srcu_read_lock() here?

>
> Index: kvm-slotslock/arch/x86/kvm/vmx.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/vmx.c
> +++ kvm-slotslock/arch/x86/kvm/vmx.c
> @@ -24,6 +24,7 @@
>   #include<linux/mm.h>
>   #include<linux/highmem.h>
>   #include<linux/sched.h>
> +#include<linux/srcu.h>
>   #include<linux/moduleparam.h>
>   #include<linux/ftrace_event.h>
>   #include "kvm_cache_regs.h"
> @@ -1465,10 +1466,18 @@ static void enter_pmode(struct kvm_vcpu
>   static gva_t rmode_tss_base(struct kvm *kvm)
>   {
>   	if (!kvm->arch.tss_addr) {
> -		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
> -				 kvm->memslots->memslots[0].npages - 3;
> +		struct kvm_memslots *slots;
> +		gfn_t base_gfn;
> +		int idx;
> +
> +		idx = srcu_read_lock(&kvm->srcu);
> +		slots = rcu_dereference(kvm->memslots);
> + 		base_gfn = slots->memslots[0].base_gfn +
> +				 slots->memslots[0].npages - 3;
> +		srcu_read_unlock(&kvm->srcu, idx);
>   		return base_gfn<<  PAGE_SHIFT;
>   	}
> +
>    

And here?  Maybe we should take the srcu_lock in vcpu_load/put and only 
drop in when going into vcpu context or explicitly sleeping, just to 
simplify things.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 08/10] KVM: x86: switch kvm_set_memory_alias to SRCU update
  2009-09-21 23:37 ` [patch 08/10] KVM: x86: switch kvm_set_memory_alias " Marcelo Tosatti
@ 2009-09-22  7:04   ` Avi Kivity
  0 siblings, 0 replies; 26+ messages in thread
From: Avi Kivity @ 2009-09-22  7:04 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
> Using a similar two-step procedure as for memslots.
>
>
>
> -	gfn = unalias_gfn(kvm, gfn);
> +	gfn = unalias_gfn(kvm, gfn, false);
>    

To improve readability suggest two names, maybe unalias_gfn() and 
unalias_gfn_instantiation().  boolean parameters are hard to read at the 
call site.

Also we've discussed in the past converting aliases to private slots 
(and also echo aliases >> feature-removal-schedule.txt); maybe that is 
an easier way of verifying everything still works.


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 10/10] KVM: switch vcpu context to use SRCU
  2009-09-21 23:37 ` [patch 10/10] KVM: switch vcpu context to use SRCU Marcelo Tosatti
@ 2009-09-22  7:07   ` Avi Kivity
  0 siblings, 0 replies; 26+ messages in thread
From: Avi Kivity @ 2009-09-22  7:07 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
> Signed-off-by: Marcelo Tosatti<mtosatti@redhat.com>
>
>    

I'd like an eleventh patch that converts slots_lock to a mutex; there 
should be no readers left, yes?

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 00/10] RFC: switch vcpu context to use SRCU
  2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
                   ` (9 preceding siblings ...)
  2009-09-21 23:37 ` [patch 10/10] KVM: switch vcpu context to use SRCU Marcelo Tosatti
@ 2009-09-22  7:09 ` Avi Kivity
  10 siblings, 0 replies; 26+ messages in thread
From: Avi Kivity @ 2009-09-22  7:09 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
> Improves vmexit latency by ~= 13% on UP guest.
>
> Still requires decent stress testing and careful review, but posting for
> early comments.
>    

Okay, looks very nice.  The only real problem I see  is mmu_lock for 
mark_dirty(), the other comments are more or less cosmetic.

Please copy the kvm-arch lists and maintainers on the next round so they 
can review this as well.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
  2009-09-21 23:37 ` [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update Marcelo Tosatti
  2009-09-22  6:59   ` Avi Kivity
@ 2009-09-22 10:40   ` Fernando Carrijo
  2009-09-22 12:55     ` Marcelo Tosatti
  2009-09-24 14:06   ` Marcelo Tosatti
  2 siblings, 1 reply; 26+ messages in thread
From: Fernando Carrijo @ 2009-09-22 10:40 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, avi

Resending with Cc: added

On Mon, 2009-09-21 at 20:37 -0300, Marcelo Tosatti wrote:

> plain text document attachment (introduce-srcu-and-use-for-slots)
> Use two steps for memslot deletion: mark the slot invalid (which stops 
> instantiation of new shadow pages for that slot, but allows destruction),
> then instantiate the new empty slot.
> 
> Also simplifies kvm_handle_hva locking.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> Index: kvm-slotslock/include/linux/kvm.h
> ===================================================================
> --- kvm-slotslock.orig/include/linux/kvm.h
> +++ kvm-slotslock/include/linux/kvm.h
> @@ -39,7 +39,7 @@ struct kvm_userspace_memory_region {
>  
>  /* for kvm_memory_region::flags */
>  #define KVM_MEM_LOG_DIRTY_PAGES  1UL
> -
> +#define KVM_MEMSLOT_INVALID      (1UL << 1)
>  
>  /* for KVM_IRQ_LINE */
>  struct kvm_irq_level {
> Index: kvm-slotslock/virt/kvm/kvm_main.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/kvm_main.c
> +++ kvm-slotslock/virt/kvm/kvm_main.c
> @@ -43,6 +43,7 @@
>  #include <linux/swap.h>
>  #include <linux/bitops.h>
>  #include <linux/spinlock.h>
> +#include <linux/srcu.h>
>  
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -352,11 +353,15 @@ static struct kvm *kvm_create_vm(void)
>  	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
>  	if (!kvm->memslots)
>  		goto out_err;
> +	if (init_srcu_struct(&kvm->srcu))
> +		goto out_err;
>  
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>  	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> -	if (!page)
> +	if (!page) {
> +		cleanup_srcu_struct(&kvm->srcu);
>  		goto out_err;
> +	}
>  
>  	kvm->coalesced_mmio_ring =
>  			(struct kvm_coalesced_mmio_ring *)page_address(page);
> @@ -367,6 +372,7 @@ static struct kvm *kvm_create_vm(void)
>  		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
>  		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
>  		if (r) {
> +			cleanup_srcu_struct(&kvm->srcu);
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>  			put_page(page);
>  #endif
> @@ -462,6 +468,7 @@ static void kvm_destroy_vm(struct kvm *k
>  	kvm_arch_flush_shadow(kvm);
>  #endif
>  	kvm_arch_destroy_vm(kvm);
> +	cleanup_srcu_struct(&kvm->srcu);
>  	hardware_disable_all();
>  	mmdrop(mm);
>  }
> @@ -502,12 +509,13 @@ int __kvm_set_memory_region(struct kvm *
>  			    struct kvm_userspace_memory_region *mem,
>  			    int user_alloc)
>  {
> -	int r;
> +	int r, flush_shadow = 0;
>  	gfn_t base_gfn;
>  	unsigned long npages;
>  	unsigned long i;
>  	struct kvm_memory_slot *memslot;
>  	struct kvm_memory_slot old, new;
> +	struct kvm_memslots *slots, *old_memslots;
>  
>  	r = -EINVAL;
>  	/* General sanity checks */
> @@ -569,15 +577,7 @@ int __kvm_set_memory_region(struct kvm *
>  		memset(new.rmap, 0, npages * sizeof(*new.rmap));
>  
>  		new.user_alloc = user_alloc;
> -		/*
> -		 * hva_to_rmmap() serialzies with the mmu_lock and to be
> -		 * safe it has to ignore memslots with !user_alloc &&
> -		 * !userspace_addr.
> -		 */
> -		if (user_alloc)
> -			new.userspace_addr = mem->userspace_addr;
> -		else
> -			new.userspace_addr = 0;
> +		new.userspace_addr = mem->userspace_addr;
>  	}
>  	if (!npages)
>  		goto skip_lpage;
> @@ -632,8 +632,9 @@ skip_lpage:
>  		if (!new.dirty_bitmap)
>  			goto out_free;
>  		memset(new.dirty_bitmap, 0, dirty_bytes);
> +		/* destroy any largepage mappings for dirty tracking */
>  		if (old.npages)
> -			kvm_arch_flush_shadow(kvm);
> +			flush_shadow = 1;
>  	}
>  #else  /* not defined CONFIG_S390 */
>  	new.user_alloc = user_alloc;
> @@ -641,34 +642,69 @@ skip_lpage:
>  		new.userspace_addr = mem->userspace_addr;
>  #endif /* not defined CONFIG_S390 */
>  
> -	if (!npages)
> +	if (!npages) {
> +		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +		if (!slots)
> +			goto out_free;
> +		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));

Nothing wrong with the above line, but it makes me think if

                  *slots = *kvm->memslots;

would save us the function call overhead

> +		if (mem->slot >= slots->nmemslots)
> +			slots->nmemslots = mem->slot + 1;
> +		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
> +
> +		old_memslots = kvm->memslots;
> +		rcu_assign_pointer(kvm->memslots, slots);
> +		synchronize_srcu(&kvm->srcu);
> +		/* From this point no new shadow pages pointing to a deleted
> +		 * memslot will be created.
> +         	 *
> +         	 * validation of sp->gfn happens in:
> +         	 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
> +         	 * 	- kvm_is_visible_gfn (mmu_check_roots)
> +         	 */
>  		kvm_arch_flush_shadow(kvm);
> +		kfree(old_memslots);
> +	}
>  
>  	r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
>  	if (r)
>  		goto out_free;
>  
> -	spin_lock(&kvm->mmu_lock);
> -	if (mem->slot >= kvm->memslots->nmemslots)
> -		kvm->memslots->nmemslots = mem->slot + 1;
> +#ifdef CONFIG_DMAR
> +	/* map the pages in iommu page table */
> +	if (npages)
> +		r = kvm_iommu_map_pages(kvm, &new);
> +		if (r)
> +			goto out_free;
> +#endif
>  
> -	*memslot = new;
> -	spin_unlock(&kvm->mmu_lock);
> +	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +	if (!slots)
> +		goto out_free;
> +	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));

Ditto

Cheers,

Fernando Carrijo.

> +	if (mem->slot >= slots->nmemslots)
> +		slots->nmemslots = mem->slot + 1;
> +
> +	/* actual memory is freed via old in kvm_free_physmem_slot below */
> +	if (!npages) {
> +		new.rmap = NULL;
> +		new.dirty_bitmap = NULL;
> +		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
> +			new.lpage_info[i] = NULL;
> +	}
> +
> +	slots->memslots[mem->slot] = new;
> +	old_memslots = kvm->memslots;
> +	rcu_assign_pointer(kvm->memslots, slots);
> +	synchronize_srcu(&kvm->srcu);
>  
>  	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
>  
> -	kvm_free_physmem_slot(&old, npages ? &new : NULL);
> -	/* Slot deletion case: we have to update the current slot */
> -	spin_lock(&kvm->mmu_lock);
> -	if (!npages)
> -		*memslot = old;
> -	spin_unlock(&kvm->mmu_lock);
> -#ifdef CONFIG_DMAR
> -	/* map the pages in iommu page table */
> -	r = kvm_iommu_map_pages(kvm, memslot);
> -	if (r)
> -		goto out;
> -#endif
> +	kvm_free_physmem_slot(&old, &new);
> +	kfree(old_memslots);
> +
> +	if (flush_shadow)
> +		kvm_arch_flush_shadow(kvm);
> +
>  	return 0;
>  
>  out_free:
> @@ -768,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
>  struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
>  {
>  	int i;
> -	struct kvm_memslots *slots = kvm->memslots;
> +	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
>  
>  	for (i = 0; i < slots->nmemslots; ++i) {
>  		struct kvm_memory_slot *memslot = &slots->memslots[i];
> @@ -790,12 +826,15 @@ struct kvm_memory_slot *gfn_to_memslot(s
>  int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
>  {
>  	int i;
> -	struct kvm_memslots *slots = kvm->memslots;
> +	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
>  
>  	gfn = unalias_gfn(kvm, gfn);
>  	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
>  		struct kvm_memory_slot *memslot = &slots->memslots[i];
>  
> +		if (memslot->flags & KVM_MEMSLOT_INVALID)
> +			continue;
> +
>  		if (gfn >= memslot->base_gfn
>  		    && gfn < memslot->base_gfn + memslot->npages)
>  			return 1;
> @@ -810,7 +849,7 @@ unsigned long gfn_to_hva(struct kvm *kvm
>  
>  	gfn = unalias_gfn(kvm, gfn);
>  	slot = gfn_to_memslot_unaliased(kvm, gfn);
> -	if (!slot)
> +	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
>  		return bad_hva();
>  	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
>  }
> Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
> ===================================================================
> --- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
> +++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
> @@ -1834,6 +1834,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
>  	struct kvm_memory_slot *memslot;
>  	int is_dirty = 0;
>  
> +	down_write(&kvm->slots_lock);
>  	spin_lock(&kvm->arch.dirty_log_lock);
>  
>  	r = kvm_ia64_sync_dirty_log(kvm, log);
> @@ -1853,6 +1854,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
>  	}
>  	r = 0;
>  out:
> +	up_write(&kvm->slots_lock);
>  	spin_unlock(&kvm->arch.dirty_log_lock);
>  	return r;
>  }
> Index: kvm-slotslock/arch/x86/kvm/mmu.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/mmu.c
> +++ kvm-slotslock/arch/x86/kvm/mmu.c
> @@ -29,6 +29,7 @@
>  #include <linux/swap.h>
>  #include <linux/hugetlb.h>
>  #include <linux/compiler.h>
> +#include <linux/srcu.h>
>  
>  #include <asm/page.h>
>  #include <asm/cmpxchg.h>
> @@ -766,23 +767,18 @@ static int kvm_unmap_rmapp(struct kvm *k
>  static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
>  			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
>  {
> -	int i, j;
> +	int i, j, idx;
>  	int retval = 0;
> -	struct kvm_memslots *slots = kvm->memslots;
> +	struct kvm_memslots *slots;
> +
> +	idx = srcu_read_lock(&kvm->srcu);
> +	slots = rcu_dereference(kvm->memslots);
>  
> -	/*
> -	 * If mmap_sem isn't taken, we can look the memslots with only
> -	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
> -	 */
>  	for (i = 0; i < slots->nmemslots; i++) {
>  		struct kvm_memory_slot *memslot = &slots->memslots[i];
>  		unsigned long start = memslot->userspace_addr;
>  		unsigned long end;
>  
> -		/* mmu_lock protects userspace_addr */
> -		if (!start)
> -			continue;
> -
>  		end = start + (memslot->npages << PAGE_SHIFT);
>  		if (hva >= start && hva < end) {
>  			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
> @@ -797,6 +793,7 @@ static int kvm_handle_hva(struct kvm *kv
>  			}
>  		}
>  	}
> +	srcu_read_unlock(&kvm->srcu, idx);
>  
>  	return retval;
>  }
> @@ -2966,16 +2963,20 @@ nomem:
>   */
>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
>  {
> -	int i;
> +	int i, idx;
>  	unsigned int nr_mmu_pages;
>  	unsigned int  nr_pages = 0;
> +	struct kvm_memslots *slots;
>  
> -	for (i = 0; i < kvm->memslots->nmemslots; i++)
> -		nr_pages += kvm->memslots->memslots[i].npages;
> +	idx = srcu_read_lock(&kvm->srcu);
> +	slots = rcu_dereference(kvm->memslots);
> +	for (i = 0; i < slots->nmemslots; i++)
> +		nr_pages += slots->memslots[i].npages;
>  
>  	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
>  	nr_mmu_pages = max(nr_mmu_pages,
>  			(unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
> +	srcu_read_unlock(&kvm->srcu, idx);
>  
>  	return nr_mmu_pages;
>  }
> @@ -3241,10 +3242,12 @@ static void audit_mappings(struct kvm_vc
>  static int count_rmaps(struct kvm_vcpu *vcpu)
>  {
>  	int nmaps = 0;
> -	int i, j, k;
> +	int i, j, k, idx;
>  
> +	idx = srcu_read_lock(&kvm->srcu);
> +	slots = rcu_dereference(kvm->memslots);
>  	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
> -		struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
> +		struct kvm_memory_slot *m = &slots->memslots[i];
>  		struct kvm_rmap_desc *d;
>  
>  		for (j = 0; j < m->npages; ++j) {
> @@ -3267,6 +3270,7 @@ static int count_rmaps(struct kvm_vcpu *
>  			}
>  		}
>  	}
> +	srcu_read_unlock(&kvm->srcu, idx);
>  	return nmaps;
>  }
>  
> Index: kvm-slotslock/arch/x86/kvm/vmx.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/vmx.c
> +++ kvm-slotslock/arch/x86/kvm/vmx.c
> @@ -24,6 +24,7 @@
>  #include <linux/mm.h>
>  #include <linux/highmem.h>
>  #include <linux/sched.h>
> +#include <linux/srcu.h>
>  #include <linux/moduleparam.h>
>  #include <linux/ftrace_event.h>
>  #include "kvm_cache_regs.h"
> @@ -1465,10 +1466,18 @@ static void enter_pmode(struct kvm_vcpu 
>  static gva_t rmode_tss_base(struct kvm *kvm)
>  {
>  	if (!kvm->arch.tss_addr) {
> -		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
> -				 kvm->memslots->memslots[0].npages - 3;
> +		struct kvm_memslots *slots;
> +		gfn_t base_gfn;
> +		int idx;
> +
> +		idx = srcu_read_lock(&kvm->srcu);
> +		slots = rcu_dereference(kvm->memslots);
> + 		base_gfn = slots->memslots[0].base_gfn +
> +				 slots->memslots[0].npages - 3;
> +		srcu_read_unlock(&kvm->srcu, idx);
>  		return base_gfn << PAGE_SHIFT;
>  	}
> +
>  	return kvm->arch.tss_addr;
>  }
>  
> Index: kvm-slotslock/include/linux/kvm_host.h
> ===================================================================
> --- kvm-slotslock.orig/include/linux/kvm_host.h
> +++ kvm-slotslock/include/linux/kvm_host.h
> @@ -163,6 +163,7 @@ struct kvm {
>  	struct rw_semaphore slots_lock;
>  	struct mm_struct *mm; /* userspace tied to this vm */
>  	struct kvm_memslots *memslots;
> +	struct srcu_struct srcu;
>  #ifdef CONFIG_KVM_APIC_ARCHITECTURE
>  	u32 bsp_vcpu_id;
>  	struct kvm_vcpu *bsp_vcpu;
> Index: kvm-slotslock/virt/kvm/assigned-dev.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/assigned-dev.c
> +++ kvm-slotslock/virt/kvm/assigned-dev.c
> @@ -504,11 +504,11 @@ out:
>  static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
>  				      struct kvm_assigned_pci_dev *assigned_dev)
>  {
> -	int r = 0;
> +	int r = 0, idx;
>  	struct kvm_assigned_dev_kernel *match;
>  	struct pci_dev *dev;
>  
> -	down_read(&kvm->slots_lock);
> +	idx = srcu_read_lock(&kvm->srcu);
>  	mutex_lock(&kvm->lock);
>  
>  	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> @@ -574,7 +574,7 @@ static int kvm_vm_ioctl_assign_device(st
>  
>  out:
>  	mutex_unlock(&kvm->lock);
> -	up_read(&kvm->slots_lock);
> +	srcu_read_unlock(&kvm->srcu, idx);
>  	return r;
>  out_list_del:
>  	list_del(&match->list);
> @@ -586,7 +586,7 @@ out_put:
>  out_free:
>  	kfree(match);
>  	mutex_unlock(&kvm->lock);
> -	up_read(&kvm->slots_lock);
> +	srcu_read_unlock(&kvm->srcu, idx);
>  	return r;
>  }
>  
> Index: kvm-slotslock/virt/kvm/iommu.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/iommu.c
> +++ kvm-slotslock/virt/kvm/iommu.c
> @@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct
>  	int i, r = 0;
>  	struct kvm_memslots *slots;
>  
> -	slots = kvm->memslots;
> +	slots = rcu_dereference(kvm->memslots);
>  
>  	for (i = 0; i < slots->nmemslots; i++) {
>  		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
> @@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(stru
>  	int i;
>  	struct kvm_memslots *slots;
>  
> -	slots = kvm->memslots;
> +	slots = rcu_dereference(kvm->memslots);
>  
>  	for (i = 0; i < slots->nmemslots; i++) {
>  		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 03/10] KVM: switch dirty_log to mmu_lock protection
  2009-09-22  6:37   ` Avi Kivity
@ 2009-09-22 12:44     ` Marcelo Tosatti
  2009-09-22 12:52       ` Avi Kivity
  0 siblings, 1 reply; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-22 12:44 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Tue, Sep 22, 2009 at 09:37:51AM +0300, Avi Kivity wrote:
> On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
>> get_dirty_log vs mark_page_dirty need to be mutually exclusive. Switch
>> to mmu_lock protection.
>>
>>    
>
> I'm not sure all archs use mmu_lock.  Also, I'm unhappy about  
> introducing more use (especially as it's often unnecessary, if dirty  
> logging is turned off for the slot for example).

True.

> I think you can use rcu for this as well.  When you read the log,  
> allocate a new empty bitmap, switch the memslots pointer to include it,  
> and synchronize_srcu().  Now we are certain everyone is using the new  
> bitmap we can copy the old one to usespace and delete it.

This will slow down get_dirty_log, which can lead to regressions in 
migration and poorer vga refresh updates.

Moreover, since updates are running concurrently, at the time
synchronize_srcu returns you can have plenty of dirty data in the new
bitmap which is not the case with mutual exclusion.

This are the reasons for the choice. Would you prefer a new rw lock?


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 03/10] KVM: switch dirty_log to mmu_lock protection
  2009-09-22 12:44     ` Marcelo Tosatti
@ 2009-09-22 12:52       ` Avi Kivity
  0 siblings, 0 replies; 26+ messages in thread
From: Avi Kivity @ 2009-09-22 12:52 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm

On 09/22/2009 03:44 PM, Marcelo Tosatti wrote:
>
>> I think you can use rcu for this as well.  When you read the log,
>> allocate a new empty bitmap, switch the memslots pointer to include it,
>> and synchronize_srcu().  Now we are certain everyone is using the new
>> bitmap we can copy the old one to usespace and delete it.
>>      
> This will slow down get_dirty_log, which can lead to regressions in
> migration and poorer vga refresh updates.
>    

That's true.  By how much? srcu is not so slow that a 30Hz refresh rate 
would notice.

> Moreover, since updates are running concurrently, at the time
> synchronize_srcu returns you can have plenty of dirty data in the new
> bitmap which is not the case with mutual exclusion.
>    

By the time ioctl() returns you can have dirty data in the new bitmap.  
Nothing wrong with that.

> This are the reasons for the choice. Would you prefer a new rw lock?
>    

Prefer the srcu thing unless convinced it is too slow.  Alternatively, 
we can copy-and-clear the bitmap using cmpxchg (which won't lose data) 
to a temporary buffer, then to userspace.  Locks are unfashionable.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
  2009-09-22 10:40   ` Fernando Carrijo
@ 2009-09-22 12:55     ` Marcelo Tosatti
  0 siblings, 0 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-22 12:55 UTC (permalink / raw)
  To: Fernando Carrijo; +Cc: kvm, avi

On Tue, Sep 22, 2009 at 07:40:10AM -0300, Fernando Carrijo wrote:
> Resending with Cc: added
> 
> On Mon, 2009-09-21 at 20:37 -0300, Marcelo Tosatti wrote:
> 
> > -			kvm_arch_flush_shadow(kvm);
> > +			flush_shadow = 1;
> >  	}
> >  #else  /* not defined CONFIG_S390 */
> >  	new.user_alloc = user_alloc;
> > @@ -641,34 +642,69 @@ skip_lpage:
> >  		new.userspace_addr = mem->userspace_addr;
> >  #endif /* not defined CONFIG_S390 */
> >  
> > -	if (!npages)
> > +	if (!npages) {
> > +		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> > +		if (!slots)
> > +			goto out_free;
> > +		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> 
> Nothing wrong with the above line, but it makes me think if
> 
>                   *slots = *kvm->memslots;
> 
> would save us the function call overhead

Perhaps. But this is a slow path anyway, so it does not matter much.

Thanks for the review.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
  2009-09-22  6:59   ` Avi Kivity
@ 2009-09-22 16:16     ` Marcelo Tosatti
  0 siblings, 0 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-22 16:16 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Tue, Sep 22, 2009 at 09:59:04AM +0300, Avi Kivity wrote:
> On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
>> Use two steps for memslot deletion: mark the slot invalid (which stops
>> instantiation of new shadow pages for that slot, but allows destruction),
>> then instantiate the new empty slot.
>>
>> Also simplifies kvm_handle_hva locking.
>>
>>   unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
>>   {
>> -	int i;
>> +	int i, idx;
>>   	unsigned int nr_mmu_pages;
>>   	unsigned int  nr_pages = 0;
>> +	struct kvm_memslots *slots;
>>
>> -	for (i = 0; i<  kvm->memslots->nmemslots; i++)
>> -		nr_pages += kvm->memslots->memslots[i].npages;
>> +	idx = srcu_read_lock(&kvm->srcu);
>>    
>
> Doesn't the caller hold the srcu_read_lock() here?

No:

kvm_vm_ioctl_set_nr_mmu_pages -> kvm_mmu_change_mmu_pages

And even if the caller did, recursive "locking" is tolerated.

>> Index: kvm-slotslock/arch/x86/kvm/vmx.c
>> ===================================================================
>> --- kvm-slotslock.orig/arch/x86/kvm/vmx.c
>> +++ kvm-slotslock/arch/x86/kvm/vmx.c
>> @@ -24,6 +24,7 @@
>>   #include<linux/mm.h>
>>   #include<linux/highmem.h>
>>   #include<linux/sched.h>
>> +#include<linux/srcu.h>
>>   #include<linux/moduleparam.h>
>>   #include<linux/ftrace_event.h>
>>   #include "kvm_cache_regs.h"
>> @@ -1465,10 +1466,18 @@ static void enter_pmode(struct kvm_vcpu
>>   static gva_t rmode_tss_base(struct kvm *kvm)
>>   {
>>   	if (!kvm->arch.tss_addr) {
>> -		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
>> -				 kvm->memslots->memslots[0].npages - 3;
>> +		struct kvm_memslots *slots;
>> +		gfn_t base_gfn;
>> +		int idx;
>> +
>> +		idx = srcu_read_lock(&kvm->srcu);
>> +		slots = rcu_dereference(kvm->memslots);
>> + 		base_gfn = slots->memslots[0].base_gfn +
>> +				 slots->memslots[0].npages - 3;
>> +		srcu_read_unlock(&kvm->srcu, idx);
>>   		return base_gfn<<  PAGE_SHIFT;
>>   	}
>> +
>>    
>
> And here?  

kvm_arch_vcpu_ioctl_set_sregs -> kvm_x86_ops->set_cr0.

> Maybe we should take the srcu_lock in vcpu_load/put and only  
> drop in when going into vcpu context or explicitly sleeping, just to  
> simplify things.

Hum, possible, but i'd rather leave it for later.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
  2009-09-21 23:37 ` [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update Marcelo Tosatti
  2009-09-22  6:59   ` Avi Kivity
  2009-09-22 10:40   ` Fernando Carrijo
@ 2009-09-24 14:06   ` Marcelo Tosatti
  2009-09-24 17:28     ` Paul E. McKenney
  2 siblings, 1 reply; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-24 14:06 UTC (permalink / raw)
  To: kvm, Paul E. McKenney; +Cc: avi

On Mon, Sep 21, 2009 at 08:37:18PM -0300, Marcelo Tosatti wrote:
> Use two steps for memslot deletion: mark the slot invalid (which stops 
> instantiation of new shadow pages for that slot, but allows destruction),
> then instantiate the new empty slot.
> 
> Also simplifies kvm_handle_hva locking.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 

<snip>

> -	if (!npages)
> +	if (!npages) {
> +		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +		if (!slots)
> +			goto out_free;
> +		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> +		if (mem->slot >= slots->nmemslots)
> +			slots->nmemslots = mem->slot + 1;
> +		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
> +
> +		old_memslots = kvm->memslots;
> +		rcu_assign_pointer(kvm->memslots, slots);
> +		synchronize_srcu(&kvm->srcu);
> +		/* From this point no new shadow pages pointing to a deleted
> +		 * memslot will be created.
> +         	 *
> +         	 * validation of sp->gfn happens in:
> +         	 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
> +         	 * 	- kvm_is_visible_gfn (mmu_check_roots)
> +         	 */
>  		kvm_arch_flush_shadow(kvm);
> +		kfree(old_memslots);
> +	}
>  
>  	r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
>  	if (r)
>  		goto out_free;
>  
> -	spin_lock(&kvm->mmu_lock);
> -	if (mem->slot >= kvm->memslots->nmemslots)
> -		kvm->memslots->nmemslots = mem->slot + 1;
> +#ifdef CONFIG_DMAR
> +	/* map the pages in iommu page table */
> +	if (npages)
> +		r = kvm_iommu_map_pages(kvm, &new);
> +		if (r)
> +			goto out_free;
> +#endif
>  
> -	*memslot = new;
> -	spin_unlock(&kvm->mmu_lock);
> +	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +	if (!slots)
> +		goto out_free;
> +	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> +	if (mem->slot >= slots->nmemslots)
> +		slots->nmemslots = mem->slot + 1;
> +
> +	/* actual memory is freed via old in kvm_free_physmem_slot below */
> +	if (!npages) {
> +		new.rmap = NULL;
> +		new.dirty_bitmap = NULL;
> +		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
> +			new.lpage_info[i] = NULL;
> +	}
> +
> +	slots->memslots[mem->slot] = new;
> +	old_memslots = kvm->memslots;
> +	rcu_assign_pointer(kvm->memslots, slots);
> +	synchronize_srcu(&kvm->srcu);
>  
>  	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);

Paul,

There is a scenario where this path, which updates KVM memory slots, is
called relatively often.

Each synchronize_srcu() call takes about 10ms (avg 3ms per
synchronize_sched call), so this is hurting us.

Is this expected? Is there any possibility for synchronize_srcu()
optimization?

There are other sides we can work on, such as reducing the memory slot 
updates, but i'm wondering what can be done regarding SRCU itself.

TIA


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
  2009-09-24 14:06   ` Marcelo Tosatti
@ 2009-09-24 17:28     ` Paul E. McKenney
  2009-09-24 18:05       ` Marcelo Tosatti
  2009-09-25 15:05       ` Avi Kivity
  0 siblings, 2 replies; 26+ messages in thread
From: Paul E. McKenney @ 2009-09-24 17:28 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, avi

On Thu, Sep 24, 2009 at 11:06:51AM -0300, Marcelo Tosatti wrote:
> On Mon, Sep 21, 2009 at 08:37:18PM -0300, Marcelo Tosatti wrote:
> > Use two steps for memslot deletion: mark the slot invalid (which stops 
> > instantiation of new shadow pages for that slot, but allows destruction),
> > then instantiate the new empty slot.
> > 
> > Also simplifies kvm_handle_hva locking.
> > 
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> > 
> 
> <snip>
> 
> > -	if (!npages)
> > +	if (!npages) {
> > +		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> > +		if (!slots)
> > +			goto out_free;
> > +		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> > +		if (mem->slot >= slots->nmemslots)
> > +			slots->nmemslots = mem->slot + 1;
> > +		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
> > +
> > +		old_memslots = kvm->memslots;
> > +		rcu_assign_pointer(kvm->memslots, slots);
> > +		synchronize_srcu(&kvm->srcu);
> > +		/* From this point no new shadow pages pointing to a deleted
> > +		 * memslot will be created.
> > +         	 *
> > +         	 * validation of sp->gfn happens in:
> > +         	 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
> > +         	 * 	- kvm_is_visible_gfn (mmu_check_roots)
> > +         	 */
> >  		kvm_arch_flush_shadow(kvm);
> > +		kfree(old_memslots);
> > +	}
> >  
> >  	r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
> >  	if (r)
> >  		goto out_free;
> >  
> > -	spin_lock(&kvm->mmu_lock);
> > -	if (mem->slot >= kvm->memslots->nmemslots)
> > -		kvm->memslots->nmemslots = mem->slot + 1;
> > +#ifdef CONFIG_DMAR
> > +	/* map the pages in iommu page table */
> > +	if (npages)
> > +		r = kvm_iommu_map_pages(kvm, &new);
> > +		if (r)
> > +			goto out_free;
> > +#endif
> >  
> > -	*memslot = new;
> > -	spin_unlock(&kvm->mmu_lock);
> > +	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> > +	if (!slots)
> > +		goto out_free;
> > +	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> > +	if (mem->slot >= slots->nmemslots)
> > +		slots->nmemslots = mem->slot + 1;
> > +
> > +	/* actual memory is freed via old in kvm_free_physmem_slot below */
> > +	if (!npages) {
> > +		new.rmap = NULL;
> > +		new.dirty_bitmap = NULL;
> > +		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
> > +			new.lpage_info[i] = NULL;
> > +	}
> > +
> > +	slots->memslots[mem->slot] = new;
> > +	old_memslots = kvm->memslots;
> > +	rcu_assign_pointer(kvm->memslots, slots);
> > +	synchronize_srcu(&kvm->srcu);
> >  
> >  	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
> 
> Paul,
> 
> There is a scenario where this path, which updates KVM memory slots, is
> called relatively often.
> 
> Each synchronize_srcu() call takes about 10ms (avg 3ms per
> synchronize_sched call), so this is hurting us.
> 
> Is this expected? Is there any possibility for synchronize_srcu()
> optimization?
> 
> There are other sides we can work on, such as reducing the memory slot 
> updates, but i'm wondering what can be done regarding SRCU itself.

This is expected behavior, but there is a possible fix currently
in mainline (Linus's git tree).  The idea would be to create a
synchronize_srcu_expedited(), which starts with synchronize_srcu(), and
replaces the synchronize_sched() calls with synchronize_sched_expedited().

This could potentially reduce the overall synchronize_srcu() latency
to well under a microsecond.  The price to be paid is that each instance
of synchronize_sched_expedited() IPIs all the online CPUs, and awakens
the migration thread on each.

Would this approach likely work for you?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
  2009-09-24 17:28     ` Paul E. McKenney
@ 2009-09-24 18:05       ` Marcelo Tosatti
  2009-09-25 15:05       ` Avi Kivity
  1 sibling, 0 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2009-09-24 18:05 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: kvm, avi

On Thu, Sep 24, 2009 at 10:28:41AM -0700, Paul E. McKenney wrote:
> > Paul,
> > 
> > There is a scenario where this path, which updates KVM memory slots, is
> > called relatively often.
> > 
> > Each synchronize_srcu() call takes about 10ms (avg 3ms per
> > synchronize_sched call), so this is hurting us.
> > 
> > Is this expected? Is there any possibility for synchronize_srcu()
> > optimization?
> > 
> > There are other sides we can work on, such as reducing the memory slot 
> > updates, but i'm wondering what can be done regarding SRCU itself.
> 
> This is expected behavior, but there is a possible fix currently
> in mainline (Linus's git tree).  The idea would be to create a
> synchronize_srcu_expedited(), which starts with synchronize_srcu(), and
> replaces the synchronize_sched() calls with synchronize_sched_expedited().
> 
> This could potentially reduce the overall synchronize_srcu() latency
> to well under a microsecond.  The price to be paid is that each instance
> of synchronize_sched_expedited() IPIs all the online CPUs, and awakens
> the migration thread on each.
> 
> Would this approach likely work for you?

Hum, this path can be triggered by a guest, so IPI'ing all online CPUs 
is not a happy thought.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update
  2009-09-24 17:28     ` Paul E. McKenney
  2009-09-24 18:05       ` Marcelo Tosatti
@ 2009-09-25 15:05       ` Avi Kivity
  1 sibling, 0 replies; 26+ messages in thread
From: Avi Kivity @ 2009-09-25 15:05 UTC (permalink / raw)
  To: paulmck; +Cc: Marcelo Tosatti, kvm

On 09/24/2009 08:28 PM, Paul E. McKenney wrote:
>
>> Each synchronize_srcu() call takes about 10ms (avg 3ms per
>> synchronize_sched call), so this is hurting us.
>>
>> Is this expected? Is there any possibility for synchronize_srcu()
>> optimization?
>>
>> There are other sides we can work on, such as reducing the memory slot
>> updates, but i'm wondering what can be done regarding SRCU itself.
>>      
> This is expected behavior, but there is a possible fix currently
> in mainline (Linus's git tree).  The idea would be to create a
> synchronize_srcu_expedited(), which starts with synchronize_srcu(), and
> replaces the synchronize_sched() calls with synchronize_sched_expedited().
>
> This could potentially reduce the overall synchronize_srcu() latency
> to well under a microsecond.  The price to be paid is that each instance
> of synchronize_sched_expedited() IPIs all the online CPUs, and awakens
> the migration thread on each.
>
> Would this approach likely work for you?
>    

It's perfect.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2009-09-25 15:57 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-21 23:37 [patch 00/10] RFC: switch vcpu context to use SRCU Marcelo Tosatti
2009-09-21 23:37 ` [patch 01/10] KVM: modify memslots layout in struct kvm Marcelo Tosatti
2009-09-21 23:37 ` [patch 02/10] KVM: modify alias layout in x86s struct kvm_arch Marcelo Tosatti
2009-09-21 23:37 ` [patch 03/10] KVM: switch dirty_log to mmu_lock protection Marcelo Tosatti
2009-09-22  6:37   ` Avi Kivity
2009-09-22 12:44     ` Marcelo Tosatti
2009-09-22 12:52       ` Avi Kivity
2009-09-21 23:37 ` [patch 04/10] KVM: split kvm_arch_set_memory_region into prepare and commit Marcelo Tosatti
2009-09-22  6:40   ` Avi Kivity
2009-09-21 23:37 ` [patch 05/10] KVM: introduce gfn_to_pfn_memslot Marcelo Tosatti
2009-09-21 23:37 ` [patch 06/10] KVM: use gfn_to_pfn_memslot in kvm_iommu_map_pages Marcelo Tosatti
2009-09-21 23:37 ` [patch 07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update Marcelo Tosatti
2009-09-22  6:59   ` Avi Kivity
2009-09-22 16:16     ` Marcelo Tosatti
2009-09-22 10:40   ` Fernando Carrijo
2009-09-22 12:55     ` Marcelo Tosatti
2009-09-24 14:06   ` Marcelo Tosatti
2009-09-24 17:28     ` Paul E. McKenney
2009-09-24 18:05       ` Marcelo Tosatti
2009-09-25 15:05       ` Avi Kivity
2009-09-21 23:37 ` [patch 08/10] KVM: x86: switch kvm_set_memory_alias " Marcelo Tosatti
2009-09-22  7:04   ` Avi Kivity
2009-09-21 23:37 ` [patch 09/10] KVM: convert io_bus to SRCU Marcelo Tosatti
2009-09-21 23:37 ` [patch 10/10] KVM: switch vcpu context to use SRCU Marcelo Tosatti
2009-09-22  7:07   ` Avi Kivity
2009-09-22  7:09 ` [patch 00/10] RFC: " Avi Kivity

This is a public inbox, see mirroring instructions
on how to clone and mirror all data and code used for this inbox