All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/4] Swapping
@ 2007-10-13  2:10 Izik Eidus
       [not found] ` <47102919.6070802-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Izik Eidus @ 2007-10-13  2:10 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

[-- Attachment #1: Type: text/plain, Size: 58 bytes --]

this patch make the guest non shadowed pages swappedable


[-- Attachment #2: 0005-make-the-guest-non-shadowed-memory-swappable.patch --]
[-- Type: text/x-patch, Size: 9461 bytes --]

>From 8e25e215b8ed95ca4ff51cbfcf5bdc438bb799f4 Mon Sep 17 00:00:00 2001
From: Izik Eidus <izik@Home1.(none)>
Date: Sat, 13 Oct 2007 04:03:28 +0200
Subject: [PATCH] make the guest non shadowed memory swappable

Signed-off-by: Izik Eidus <izike-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
---
 drivers/kvm/kvm.h         |    1 +
 drivers/kvm/kvm_main.c    |   66 +++++++++++++++++++++++++-------------------
 drivers/kvm/mmu.c         |   13 ++++++++-
 drivers/kvm/paging_tmpl.h |   23 +++++++++++++--
 4 files changed, 70 insertions(+), 33 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index a155c2b..2e83fa7 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -409,6 +409,7 @@ struct kvm_memory_slot {
 	unsigned long *rmap;
 	unsigned long *dirty_bitmap;
 	int user_alloc; /* user allocated memory */
+	unsigned long userspace_addr;
 };
 
 struct kvm {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index bfa201c..0dce93c 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -321,15 +321,6 @@ static struct kvm *kvm_create_vm(void)
 
 static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
 {
-	int i;
-
-	for (i = 0; i < free->npages; ++i) {
-		if (free->phys_mem[i]) {
-			if (!PageReserved(free->phys_mem[i]))
-				SetPageDirty(free->phys_mem[i]);
-			page_cache_release(free->phys_mem[i]);
-		}
-	}
 }
 
 static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
@@ -771,19 +762,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 		memset(new.phys_mem, 0, npages * sizeof(struct page *));
 		memset(new.rmap, 0, npages * sizeof(*new.rmap));
 		if (user_alloc) {
-			unsigned long pages_num;
-
 			new.user_alloc = 1;
-			down_read(&current->mm->mmap_sem);
-
-			pages_num = get_user_pages(current, current->mm,
-						   mem->userspace_addr,
-						   npages, 1, 0, new.phys_mem,
-						   NULL);
-
-			up_read(&current->mm->mmap_sem);
-			if (pages_num != npages)
-				goto out_unlock;
+			new.userspace_addr = mem->userspace_addr;
 		} else {
 			for (i = 0; i < npages; ++i) {
 				new.phys_mem[i] = alloc_page(GFP_HIGHUSER
@@ -1058,8 +1038,27 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 
 	gfn = unalias_gfn(kvm, gfn);
 	slot = __gfn_to_memslot(kvm, gfn);
-	if (!slot)
+	if (!slot) {
+		get_page(bad_page);
 		return bad_page;
+	}
+	if (slot->user_alloc) {
+		struct page *page[1];
+		int npages;
+
+		down_read(&current->mm->mmap_sem);
+		npages = get_user_pages(current, current->mm,
+					slot->userspace_addr
+					+ (gfn - slot->base_gfn) * PAGE_SIZE, 1,
+					1, 0, page, NULL);
+		up_read(&current->mm->mmap_sem);
+		if (npages != 1) {
+			get_page(bad_page);
+			return bad_page;
+		}
+		return page[0];
+	}
+	get_page(slot->phys_mem[gfn - slot->base_gfn]);
 	return slot->phys_mem[gfn - slot->base_gfn];
 }
 EXPORT_SYMBOL_GPL(gfn_to_page);
@@ -1079,13 +1078,16 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 	struct page *page;
 
 	page = gfn_to_page(kvm, gfn);
-	if (is_error_page(page))
+	if (is_error_page(page)) {
+		put_page(page);
 		return -EFAULT;
+	}
 	page_virt = kmap_atomic(page, KM_USER0);
 
 	memcpy(data, page_virt + offset, len);
 
 	kunmap_atomic(page_virt, KM_USER0);
+	put_page(page);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_read_guest_page);
@@ -1117,14 +1119,17 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
 	struct page *page;
 
 	page = gfn_to_page(kvm, gfn);
-	if (is_error_page(page))
+	if (is_error_page(page)) {
+		put_page(page);
 		return -EFAULT;
+	}
 	page_virt = kmap_atomic(page, KM_USER0);
 
 	memcpy(page_virt + offset, data, len);
 
 	kunmap_atomic(page_virt, KM_USER0);
 	mark_page_dirty(kvm, gfn);
+	put_page(page);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
@@ -1155,13 +1160,16 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
 	struct page *page;
 
 	page = gfn_to_page(kvm, gfn);
-	if (is_error_page(page))
+	if (is_error_page(page)) {
+		put_page(page);
 		return -EFAULT;
+	}
 	page_virt = kmap_atomic(page, KM_USER0);
 
 	memset(page_virt + offset, 0, len);
 
 	kunmap_atomic(page_virt, KM_USER0);
+	put_page(page);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
@@ -2090,13 +2098,12 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	for (i = 0; i < nr_pages; ++i) {
 		mutex_lock(&vcpu->kvm->lock);
 		page = gva_to_page(vcpu, address + i * PAGE_SIZE);
-		if (page)
-			get_page(page);
 		vcpu->pio.guest_pages[i] = page;
 		mutex_unlock(&vcpu->kvm->lock);
 		if (!page) {
 			inject_gp(vcpu);
 			free_pio_guest_pages(vcpu);
 			return 1;
 		}
 	}
@@ -3081,9 +3088,10 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
 
 	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 	page = gfn_to_page(kvm, pgoff);
-	if (is_error_page(page))
+	if (is_error_page(page)) {
+		put_page(page);
 		return NOPAGE_SIGBUS;
-	get_page(page);
+	}
 	if (type != NULL)
 		*type = VM_FAULT_MINOR;
 
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index e6a9b4a..19af09a 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -425,6 +425,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 	if (!is_rmap_pte(*spte))
 		return;
 	page = page_header(__pa(spte));
+	put_page(pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT));
 	rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]);
 	if (!*rmapp) {
 		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -907,6 +908,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
 			table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
 								PT_USER_MASK;
 			rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
+			if (!is_rmap_pte(table[index]))
+				put_page(pfn_to_page((v & PT64_BASE_ADDR_MASK)
+					 >> PAGE_SHIFT));
 			return 0;
 		}
 
@@ -921,6 +925,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
 						     1, 3, &table[index]);
 			if (!new_table) {
 				pgprintk("nonpaging_map: ENOMEM\n");
+				put_page(pfn_to_page((v & PT64_BASE_ADDR_MASK)
+						     >> PAGE_SHIFT));
 				return -ENOMEM;
 			}
 
@@ -1035,8 +1041,11 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 
 	paddr = gpa_to_hpa(vcpu->kvm, addr & PT64_BASE_ADDR_MASK);
 
-	if (is_error_hpa(paddr))
+	if (is_error_hpa(paddr)) {
+		put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+				     >> PAGE_SHIFT));
 		return 1;
+	}
 
 	return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
 }
@@ -1515,6 +1524,8 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
 				 && !is_error_hpa(hpa))
 				printk(KERN_ERR "audit: (%s) notrap shadow,"
 				       " valid guest gva %lx\n", audit_msg, va);
+			put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+					     >> PAGE_SHIFT));
 
 		}
 	}
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 58fd35a..931a308 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -175,6 +175,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 		walker->inherited_ar &= walker->table[index];
 		table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
 		kunmap_atomic(walker->table, KM_USER0);
+		put_page(walker->page);
 		walker->page = gfn_to_page(vcpu->kvm, table_gfn);
 		walker->table = kmap_atomic(walker->page, KM_USER0);
 		--walker->level;
@@ -183,8 +184,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 			 walker->level - 1, table_gfn);
 	}
 	walker->pte = *ptep;
-	if (walker->page)
+	if (walker->page) {
 		walker->ptep = NULL;
+		put_page(walker->page);
+	}
 	if (walker->table)
 		kunmap_atomic(walker->table, KM_USER0);
 	pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
@@ -206,6 +209,8 @@ err:
 		walker->error_code |= PFERR_FETCH_MASK;
 	if (walker->table)
 		kunmap_atomic(walker->table, KM_USER0);
+	if (walker->page)
+		put_page(walker->page);
 	return 0;
 }
 
@@ -249,6 +254,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
 	if (is_error_hpa(paddr)) {
 		set_shadow_pte(shadow_pte,
 			       shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
+		put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+				     >> PAGE_SHIFT));
 		return;
 	}
 
@@ -286,9 +293,16 @@ unshadowed:
 	pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
 	set_shadow_pte(shadow_pte, spte);
 	page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
-	if (!was_rmapped)
+	if (!was_rmapped) {
 		rmap_add(vcpu, shadow_pte, (gaddr & PT64_BASE_ADDR_MASK)
 			 >> PAGE_SHIFT);
+		if (!is_rmap_pte(*shadow_pte))
+			put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+					     >> PAGE_SHIFT));
+	}
+	else
+		put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+				     >> PAGE_SHIFT));
 	if (!ptwrite || !*ptwrite)
 		vcpu->last_pte_updated = shadow_pte;
 }
@@ -512,19 +526,22 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
 {
 	int i;
 	pt_element_t *gpt;
+	struct page *page;
 
 	if (sp->role.metaphysical || PTTYPE == 32) {
 		nonpaging_prefetch_page(vcpu, sp);
 		return;
 	}
 
-	gpt = kmap_atomic(gfn_to_page(vcpu->kvm, sp->gfn), KM_USER0);
+	page = gfn_to_page(vcpu->kvm, sp->gfn);
+	gpt = kmap_atomic(page, KM_USER0);
 	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
 		if (is_present_pte(gpt[i]))
 			sp->spt[i] = shadow_trap_nonpresent_pte;
 		else
 			sp->spt[i] = shadow_notrap_nonpresent_pte;
 	kunmap_atomic(gpt, KM_USER0);
+	put_page(page);
 }
 
 #undef pt_element_t
-- 
1.5.2.4


[-- Attachment #3: Type: text/plain, Size: 314 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

[-- Attachment #4: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found] ` <47102919.6070802-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-10-13 20:04   ` Anthony Liguori
       [not found]     ` <471124D4.3090901-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Anthony Liguori @ 2007-10-13 20:04 UTC (permalink / raw)
  To: Izik Eidus; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Izik Eidus wrote:
> this patch make the guest non shadowed pages swappedable
>
> ------------------------------------------------------------------------
>
> -------------------------------------------------------------------------
> This SF.net email is sponsored by: Splunk Inc.
> Still grepping through log files to find problems?  Stop.
> Now Search log events and configuration files using AJAX and a browser.
> Download your FREE copy of Splunk now >> http://get.splunk.com/
> ------------------------------------------------------------------------
>
> _______________________________________________
> kvm-devel mailing list
> kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
> https://lists.sourceforge.net/lists/listinfo/kvm-devel
> From 8e25e215b8ed95ca4ff51cbfcf5bdc438bb799f4 Mon Sep 17 00:00:00 2001
> From: Izik Eidus <izik@Home1.(none)>
> Date: Sat, 13 Oct 2007 04:03:28 +0200
> Subject: [PATCH] make the guest non shadowed memory swappable
>
> Signed-off-by: Izik Eidus <izike-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
> ---
>  drivers/kvm/kvm.h         |    1 +
>  drivers/kvm/kvm_main.c    |   66 +++++++++++++++++++++++++-------------------
>  drivers/kvm/mmu.c         |   13 ++++++++-
>  drivers/kvm/paging_tmpl.h |   23 +++++++++++++--
>  4 files changed, 70 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
> index a155c2b..2e83fa7 100644
> --- a/drivers/kvm/kvm.h
> +++ b/drivers/kvm/kvm.h
> @@ -409,6 +409,7 @@ struct kvm_memory_slot {
>  	unsigned long *rmap;
>  	unsigned long *dirty_bitmap;
>  	int user_alloc; /* user allocated memory */
> +	unsigned long userspace_addr;
>  };
>  
>  struct kvm {
> diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
> index bfa201c..0dce93c 100644
> --- a/drivers/kvm/kvm_main.c
> +++ b/drivers/kvm/kvm_main.c
> @@ -321,15 +321,6 @@ static struct kvm *kvm_create_vm(void)
>  
>  static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
>  {
> -	int i;
> -
> -	for (i = 0; i < free->npages; ++i) {
> -		if (free->phys_mem[i]) {
> -			if (!PageReserved(free->phys_mem[i]))
> -				SetPageDirty(free->phys_mem[i]);
> -			page_cache_release(free->phys_mem[i]);
> -		}
> -	}
>  }
>
>   

If you're making this a nop, then you might as well just delete the 
function and it's callers.

>  static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
> @@ -771,19 +762,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
>  		memset(new.phys_mem, 0, npages * sizeof(struct page *));
>  		memset(new.rmap, 0, npages * sizeof(*new.rmap));
>  		if (user_alloc) {
> -			unsigned long pages_num;
> -
>  			new.user_alloc = 1;
> -			down_read(&current->mm->mmap_sem);
> -
> -			pages_num = get_user_pages(current, current->mm,
> -						   mem->userspace_addr,
> -						   npages, 1, 0, new.phys_mem,
> -						   NULL);
> -
> -			up_read(&current->mm->mmap_sem);
> -			if (pages_num != npages)
> -				goto out_unlock;
> +			new.userspace_addr = mem->userspace_addr;
>  		} else {
>   

You can eliminate the braces in this condition.

>  			for (i = 0; i < npages; ++i) {
>  				new.phys_mem[i] = alloc_page(GFP_HIGHUSER
> @@ -1058,8 +1038,27 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
>  
>  	gfn = unalias_gfn(kvm, gfn);
>  	slot = __gfn_to_memslot(kvm, gfn);
> -	if (!slot)
> +	if (!slot) {
> +		get_page(bad_page);
>  		return bad_page;
> +	}
> +	if (slot->user_alloc) {
> +		struct page *page[1];
> +		int npages;
> +
> +		down_read(&current->mm->mmap_sem);
> +		npages = get_user_pages(current, current->mm,
> +					slot->userspace_addr
> +					+ (gfn - slot->base_gfn) * PAGE_SIZE, 1,
> +					1, 0, page, NULL);
> +		up_read(&current->mm->mmap_sem);
> +		if (npages != 1) {
> +			get_page(bad_page);
> +			return bad_page;
> +		}
> +		return page[0];
>   

Wouldn't it be necessary to assign page[0] to slot->phys_mem[gfn - 
slot->base_gfn]?

Regards,

Anthony Liguori

> +	}
> +	get_page(slot->phys_mem[gfn - slot->base_gfn]);
>  	return slot->phys_mem[gfn - slot->base_gfn];
>  }
>  EXPORT_SYMBOL_GPL(gfn_to_page);
> @@ -1079,13 +1078,16 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
>  	struct page *page;
>  
>  	page = gfn_to_page(kvm, gfn);
> -	if (is_error_page(page))
> +	if (is_error_page(page)) {
> +		put_page(page);
>  		return -EFAULT;
> +	}
>  	page_virt = kmap_atomic(page, KM_USER0);
>  
>  	memcpy(data, page_virt + offset, len);
>  
>  	kunmap_atomic(page_virt, KM_USER0);
> +	put_page(page);
>  	return 0;
>  }
>  EXPORT_SYMBOL_GPL(kvm_read_guest_page);
> @@ -1117,14 +1119,17 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
>  	struct page *page;
>  
>  	page = gfn_to_page(kvm, gfn);
> -	if (is_error_page(page))
> +	if (is_error_page(page)) {
> +		put_page(page);
>  		return -EFAULT;
> +	}
>  	page_virt = kmap_atomic(page, KM_USER0);
>  
>  	memcpy(page_virt + offset, data, len);
>  
>  	kunmap_atomic(page_virt, KM_USER0);
>  	mark_page_dirty(kvm, gfn);
> +	put_page(page);
>  	return 0;
>  }
>  EXPORT_SYMBOL_GPL(kvm_write_guest_page);
> @@ -1155,13 +1160,16 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
>  	struct page *page;
>  
>  	page = gfn_to_page(kvm, gfn);
> -	if (is_error_page(page))
> +	if (is_error_page(page)) {
> +		put_page(page);
>  		return -EFAULT;
> +	}
>  	page_virt = kmap_atomic(page, KM_USER0);
>  
>  	memset(page_virt + offset, 0, len);
>  
>  	kunmap_atomic(page_virt, KM_USER0);
> +	put_page(page);
>  	return 0;
>  }
>  EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
> @@ -2090,13 +2098,12 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
>  	for (i = 0; i < nr_pages; ++i) {
>  		mutex_lock(&vcpu->kvm->lock);
>  		page = gva_to_page(vcpu, address + i * PAGE_SIZE);
> -		if (page)
> -			get_page(page);
>  		vcpu->pio.guest_pages[i] = page;
>  		mutex_unlock(&vcpu->kvm->lock);
>  		if (!page) {
>  			inject_gp(vcpu);
>  			free_pio_guest_pages(vcpu);
>  			return 1;
>  		}
>  	}
> @@ -3081,9 +3088,10 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
>  
>  	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
>  	page = gfn_to_page(kvm, pgoff);
> -	if (is_error_page(page))
> +	if (is_error_page(page)) {
> +		put_page(page);
>  		return NOPAGE_SIGBUS;
> -	get_page(page);
> +	}
>  	if (type != NULL)
>  		*type = VM_FAULT_MINOR;
>  
> diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
> index e6a9b4a..19af09a 100644
> --- a/drivers/kvm/mmu.c
> +++ b/drivers/kvm/mmu.c
> @@ -425,6 +425,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
>  	if (!is_rmap_pte(*spte))
>  		return;
>  	page = page_header(__pa(spte));
> +	put_page(pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT));
>  	rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]);
>  	if (!*rmapp) {
>  		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
> @@ -907,6 +908,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
>  			table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
>  								PT_USER_MASK;
>  			rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
> +			if (!is_rmap_pte(table[index]))
> +				put_page(pfn_to_page((v & PT64_BASE_ADDR_MASK)
> +					 >> PAGE_SHIFT));
>  			return 0;
>  		}
>  
> @@ -921,6 +925,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
>  						     1, 3, &table[index]);
>  			if (!new_table) {
>  				pgprintk("nonpaging_map: ENOMEM\n");
> +				put_page(pfn_to_page((v & PT64_BASE_ADDR_MASK)
> +						     >> PAGE_SHIFT));
>  				return -ENOMEM;
>  			}
>  
> @@ -1035,8 +1041,11 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
>  
>  	paddr = gpa_to_hpa(vcpu->kvm, addr & PT64_BASE_ADDR_MASK);
>  
> -	if (is_error_hpa(paddr))
> +	if (is_error_hpa(paddr)) {
> +		put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
> +				     >> PAGE_SHIFT));
>  		return 1;
> +	}
>  
>  	return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
>  }
> @@ -1515,6 +1524,8 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
>  				 && !is_error_hpa(hpa))
>  				printk(KERN_ERR "audit: (%s) notrap shadow,"
>  				       " valid guest gva %lx\n", audit_msg, va);
> +			put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
> +					     >> PAGE_SHIFT));
>  
>  		}
>  	}
> diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
> index 58fd35a..931a308 100644
> --- a/drivers/kvm/paging_tmpl.h
> +++ b/drivers/kvm/paging_tmpl.h
> @@ -175,6 +175,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
>  		walker->inherited_ar &= walker->table[index];
>  		table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
>  		kunmap_atomic(walker->table, KM_USER0);
> +		put_page(walker->page);
>  		walker->page = gfn_to_page(vcpu->kvm, table_gfn);
>  		walker->table = kmap_atomic(walker->page, KM_USER0);
>  		--walker->level;
> @@ -183,8 +184,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
>  			 walker->level - 1, table_gfn);
>  	}
>  	walker->pte = *ptep;
> -	if (walker->page)
> +	if (walker->page) {
>  		walker->ptep = NULL;
> +		put_page(walker->page);
> +	}
>  	if (walker->table)
>  		kunmap_atomic(walker->table, KM_USER0);
>  	pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
> @@ -206,6 +209,8 @@ err:
>  		walker->error_code |= PFERR_FETCH_MASK;
>  	if (walker->table)
>  		kunmap_atomic(walker->table, KM_USER0);
> +	if (walker->page)
> +		put_page(walker->page);
>  	return 0;
>  }
>  
> @@ -249,6 +254,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
>  	if (is_error_hpa(paddr)) {
>  		set_shadow_pte(shadow_pte,
>  			       shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
> +		put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
> +				     >> PAGE_SHIFT));
>  		return;
>  	}
>  
> @@ -286,9 +293,16 @@ unshadowed:
>  	pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
>  	set_shadow_pte(shadow_pte, spte);
>  	page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
> -	if (!was_rmapped)
> +	if (!was_rmapped) {
>  		rmap_add(vcpu, shadow_pte, (gaddr & PT64_BASE_ADDR_MASK)
>  			 >> PAGE_SHIFT);
> +		if (!is_rmap_pte(*shadow_pte))
> +			put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
> +					     >> PAGE_SHIFT));
> +	}
> +	else
> +		put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
> +				     >> PAGE_SHIFT));
>  	if (!ptwrite || !*ptwrite)
>  		vcpu->last_pte_updated = shadow_pte;
>  }
> @@ -512,19 +526,22 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
>  {
>  	int i;
>  	pt_element_t *gpt;
> +	struct page *page;
>  
>  	if (sp->role.metaphysical || PTTYPE == 32) {
>  		nonpaging_prefetch_page(vcpu, sp);
>  		return;
>  	}
>  
> -	gpt = kmap_atomic(gfn_to_page(vcpu->kvm, sp->gfn), KM_USER0);
> +	page = gfn_to_page(vcpu->kvm, sp->gfn);
> +	gpt = kmap_atomic(page, KM_USER0);
>  	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
>  		if (is_present_pte(gpt[i]))
>  			sp->spt[i] = shadow_trap_nonpresent_pte;
>  		else
>  			sp->spt[i] = shadow_notrap_nonpresent_pte;
>  	kunmap_atomic(gpt, KM_USER0);
> +	put_page(page);
>  }
>  
>  #undef pt_element_t
> -- 1.5.2.4


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]     ` <471124D4.3090901-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
@ 2007-10-13 20:13       ` Izik Eidus
       [not found]         ` <471126D9.4030204-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Izik Eidus @ 2007-10-13 20:13 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Anthony Liguori wrote:
> Izik Eidus wrote:
>> this patch make the guest non shadowed pages swappedable
>>
>> ------------------------------------------------------------------------
>>
>> ------------------------------------------------------------------------- 
>>
>> This SF.net email is sponsored by: Splunk Inc.
>> Still grepping through log files to find problems?  Stop.
>> Now Search log events and configuration files using AJAX and a browser.
>> Download your FREE copy of Splunk now >> http://get.splunk.com/
>> ------------------------------------------------------------------------
>>
>> _______________________________________________
>> kvm-devel mailing list
>> kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
>> https://lists.sourceforge.net/lists/listinfo/kvm-devel
>> From 8e25e215b8ed95ca4ff51cbfcf5bdc438bb799f4 Mon Sep 17 00:00:00 2001
>> From: Izik Eidus <izik@Home1.(none)>
>> Date: Sat, 13 Oct 2007 04:03:28 +0200
>> Subject: [PATCH] make the guest non shadowed memory swappable
>>
>> Signed-off-by: Izik Eidus <izike-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
>> ---
>>  drivers/kvm/kvm.h         |    1 +
>>  drivers/kvm/kvm_main.c    |   66 
>> +++++++++++++++++++++++++-------------------
>>  drivers/kvm/mmu.c         |   13 ++++++++-
>>  drivers/kvm/paging_tmpl.h |   23 +++++++++++++--
>>  4 files changed, 70 insertions(+), 33 deletions(-)
>>
>> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
>> index a155c2b..2e83fa7 100644
>> --- a/drivers/kvm/kvm.h
>> +++ b/drivers/kvm/kvm.h
>> @@ -409,6 +409,7 @@ struct kvm_memory_slot {
>>      unsigned long *rmap;
>>      unsigned long *dirty_bitmap;
>>      int user_alloc; /* user allocated memory */
>> +    unsigned long userspace_addr;
>>  };
>>  
>>  struct kvm {
>> diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
>> index bfa201c..0dce93c 100644
>> --- a/drivers/kvm/kvm_main.c
>> +++ b/drivers/kvm/kvm_main.c
>> @@ -321,15 +321,6 @@ static struct kvm *kvm_create_vm(void)
>>  
>>  static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
>>  {
>> -    int i;
>> -
>> -    for (i = 0; i < free->npages; ++i) {
>> -        if (free->phys_mem[i]) {
>> -            if (!PageReserved(free->phys_mem[i]))
>> -                SetPageDirty(free->phys_mem[i]);
>> -            page_cache_release(free->phys_mem[i]);
>> -        }
>> -    }
>>  }
>>
>>   
>
> If you're making this a nop, then you might as well just delete the 
> function and it's callers.
agree, i will delete it.
>
>>  static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
>> @@ -771,19 +762,8 @@ static int kvm_vm_ioctl_set_memory_region(struct 
>> kvm *kvm,
>>          memset(new.phys_mem, 0, npages * sizeof(struct page *));
>>          memset(new.rmap, 0, npages * sizeof(*new.rmap));
>>          if (user_alloc) {
>> -            unsigned long pages_num;
>> -
>>              new.user_alloc = 1;
>> -            down_read(&current->mm->mmap_sem);
>> -
>> -            pages_num = get_user_pages(current, current->mm,
>> -                           mem->userspace_addr,
>> -                           npages, 1, 0, new.phys_mem,
>> -                           NULL);
>> -
>> -            up_read(&current->mm->mmap_sem);
>> -            if (pages_num != npages)
>> -                goto out_unlock;
>> +            new.userspace_addr = mem->userspace_addr;
>>          } else {
>>   
>
> You can eliminate the braces in this condition.
yup, will change it.
>
>>              for (i = 0; i < npages; ++i) {
>>                  new.phys_mem[i] = alloc_page(GFP_HIGHUSER
>> @@ -1058,8 +1038,27 @@ struct page *gfn_to_page(struct kvm *kvm, 
>> gfn_t gfn)
>>  
>>      gfn = unalias_gfn(kvm, gfn);
>>      slot = __gfn_to_memslot(kvm, gfn);
>> -    if (!slot)
>> +    if (!slot) {
>> +        get_page(bad_page);
>>          return bad_page;
>> +    }
>> +    if (slot->user_alloc) {
>> +        struct page *page[1];
>> +        int npages;
>> +
>> +        down_read(&current->mm->mmap_sem);
>> +        npages = get_user_pages(current, current->mm,
>> +                    slot->userspace_addr
>> +                    + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
>> +                    1, 0, page, NULL);
>> +        up_read(&current->mm->mmap_sem);
>> +        if (npages != 1) {
>> +            get_page(bad_page);
>> +            return bad_page;
>> +        }
>> +        return page[0];
>>   
>
> Wouldn't it be necessary to assign page[0] to slot->phys_mem[gfn - 
> slot->base_gfn]?
no, this is bad page, if by when get_user_pages was called we are not 
able to get the page,
it mean the host have serious problem!  (for example it couldnt get the 
page out from swapping)
and if you talk about the bad_page before get_user_pages (at if 
(slot->user_alloc) {)
the guest have serious bug, beacuse it try to get page that it dont have,
in this case, all we care about is the protect the host.
>
> Regards,
>
> Anthony Liguori
>
>> +    }
>> +    get_page(slot->phys_mem[gfn - slot->base_gfn]);
>>      return slot->phys_mem[gfn - slot->base_gfn];
>>  }
>>  EXPORT_SYMBOL_GPL(gfn_to_page);
>> @@ -1079,13 +1078,16 @@ int kvm_read_guest_page(struct kvm *kvm, 
>> gfn_t gfn, void *data, int offset,
>>      struct page *page;
>>  
>>      page = gfn_to_page(kvm, gfn);
>> -    if (is_error_page(page))
>> +    if (is_error_page(page)) {
>> +        put_page(page);
>>          return -EFAULT;
>> +    }
>>      page_virt = kmap_atomic(page, KM_USER0);
>>  
>>      memcpy(data, page_virt + offset, len);
>>  
>>      kunmap_atomic(page_virt, KM_USER0);
>> +    put_page(page);
>>      return 0;
>>  }
>>  EXPORT_SYMBOL_GPL(kvm_read_guest_page);
>> @@ -1117,14 +1119,17 @@ int kvm_write_guest_page(struct kvm *kvm, 
>> gfn_t gfn, const void *data,
>>      struct page *page;
>>  
>>      page = gfn_to_page(kvm, gfn);
>> -    if (is_error_page(page))
>> +    if (is_error_page(page)) {
>> +        put_page(page);
>>          return -EFAULT;
>> +    }
>>      page_virt = kmap_atomic(page, KM_USER0);
>>  
>>      memcpy(page_virt + offset, data, len);
>>  
>>      kunmap_atomic(page_virt, KM_USER0);
>>      mark_page_dirty(kvm, gfn);
>> +    put_page(page);
>>      return 0;
>>  }
>>  EXPORT_SYMBOL_GPL(kvm_write_guest_page);
>> @@ -1155,13 +1160,16 @@ int kvm_clear_guest_page(struct kvm *kvm, 
>> gfn_t gfn, int offset, int len)
>>      struct page *page;
>>  
>>      page = gfn_to_page(kvm, gfn);
>> -    if (is_error_page(page))
>> +    if (is_error_page(page)) {
>> +        put_page(page);
>>          return -EFAULT;
>> +    }
>>      page_virt = kmap_atomic(page, KM_USER0);
>>  
>>      memset(page_virt + offset, 0, len);
>>  
>>      kunmap_atomic(page_virt, KM_USER0);
>> +    put_page(page);
>>      return 0;
>>  }
>>  EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
>> @@ -2090,13 +2098,12 @@ int kvm_emulate_pio_string(struct kvm_vcpu 
>> *vcpu, struct kvm_run *run, int in,
>>      for (i = 0; i < nr_pages; ++i) {
>>          mutex_lock(&vcpu->kvm->lock);
>>          page = gva_to_page(vcpu, address + i * PAGE_SIZE);
>> -        if (page)
>> -            get_page(page);
>>          vcpu->pio.guest_pages[i] = page;
>>          mutex_unlock(&vcpu->kvm->lock);
>>          if (!page) {
>>              inject_gp(vcpu);
>>              free_pio_guest_pages(vcpu);
>>              return 1;
>>          }
>>      }
>> @@ -3081,9 +3088,10 @@ static struct page *kvm_vm_nopage(struct 
>> vm_area_struct *vma,
>>  
>>      pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
>>      page = gfn_to_page(kvm, pgoff);
>> -    if (is_error_page(page))
>> +    if (is_error_page(page)) {
>> +        put_page(page);
>>          return NOPAGE_SIGBUS;
>> -    get_page(page);
>> +    }
>>      if (type != NULL)
>>          *type = VM_FAULT_MINOR;
>>  
>> diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
>> index e6a9b4a..19af09a 100644
>> --- a/drivers/kvm/mmu.c
>> +++ b/drivers/kvm/mmu.c
>> @@ -425,6 +425,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
>>      if (!is_rmap_pte(*spte))
>>          return;
>>      page = page_header(__pa(spte));
>> +    put_page(pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT));
>>      rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]);
>>      if (!*rmapp) {
>>          printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
>> @@ -907,6 +908,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, 
>> gva_t v, hpa_t p)
>>              table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
>>                                  PT_USER_MASK;
>>              rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
>> +            if (!is_rmap_pte(table[index]))
>> +                put_page(pfn_to_page((v & PT64_BASE_ADDR_MASK)
>> +                     >> PAGE_SHIFT));
>>              return 0;
>>          }
>>  
>> @@ -921,6 +925,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, 
>> gva_t v, hpa_t p)
>>                               1, 3, &table[index]);
>>              if (!new_table) {
>>                  pgprintk("nonpaging_map: ENOMEM\n");
>> +                put_page(pfn_to_page((v & PT64_BASE_ADDR_MASK)
>> +                             >> PAGE_SHIFT));
>>                  return -ENOMEM;
>>              }
>>  
>> @@ -1035,8 +1041,11 @@ static int nonpaging_page_fault(struct 
>> kvm_vcpu *vcpu, gva_t gva,
>>  
>>      paddr = gpa_to_hpa(vcpu->kvm, addr & PT64_BASE_ADDR_MASK);
>>  
>> -    if (is_error_hpa(paddr))
>> +    if (is_error_hpa(paddr)) {
>> +        put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
>> +                     >> PAGE_SHIFT));
>>          return 1;
>> +    }
>>  
>>      return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
>>  }
>> @@ -1515,6 +1524,8 @@ static void audit_mappings_page(struct kvm_vcpu 
>> *vcpu, u64 page_pte,
>>                   && !is_error_hpa(hpa))
>>                  printk(KERN_ERR "audit: (%s) notrap shadow,"
>>                         " valid guest gva %lx\n", audit_msg, va);
>> +            put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
>> +                         >> PAGE_SHIFT));
>>  
>>          }
>>      }
>> diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
>> index 58fd35a..931a308 100644
>> --- a/drivers/kvm/paging_tmpl.h
>> +++ b/drivers/kvm/paging_tmpl.h
>> @@ -175,6 +175,7 @@ static int FNAME(walk_addr)(struct guest_walker 
>> *walker,
>>          walker->inherited_ar &= walker->table[index];
>>          table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
>>          kunmap_atomic(walker->table, KM_USER0);
>> +        put_page(walker->page);
>>          walker->page = gfn_to_page(vcpu->kvm, table_gfn);
>>          walker->table = kmap_atomic(walker->page, KM_USER0);
>>          --walker->level;
>> @@ -183,8 +184,10 @@ static int FNAME(walk_addr)(struct guest_walker 
>> *walker,
>>               walker->level - 1, table_gfn);
>>      }
>>      walker->pte = *ptep;
>> -    if (walker->page)
>> +    if (walker->page) {
>>          walker->ptep = NULL;
>> +        put_page(walker->page);
>> +    }
>>      if (walker->table)
>>          kunmap_atomic(walker->table, KM_USER0);
>>      pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
>> @@ -206,6 +209,8 @@ err:
>>          walker->error_code |= PFERR_FETCH_MASK;
>>      if (walker->table)
>>          kunmap_atomic(walker->table, KM_USER0);
>> +    if (walker->page)
>> +        put_page(walker->page);
>>      return 0;
>>  }
>>  
>> @@ -249,6 +254,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu 
>> *vcpu,
>>      if (is_error_hpa(paddr)) {
>>          set_shadow_pte(shadow_pte,
>>                     shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
>> +        put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
>> +                     >> PAGE_SHIFT));
>>          return;
>>      }
>>  
>> @@ -286,9 +293,16 @@ unshadowed:
>>      pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
>>      set_shadow_pte(shadow_pte, spte);
>>      page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
>> -    if (!was_rmapped)
>> +    if (!was_rmapped) {
>>          rmap_add(vcpu, shadow_pte, (gaddr & PT64_BASE_ADDR_MASK)
>>               >> PAGE_SHIFT);
>> +        if (!is_rmap_pte(*shadow_pte))
>> +            put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
>> +                         >> PAGE_SHIFT));
>> +    }
>> +    else
>> +        put_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
>> +                     >> PAGE_SHIFT));
>>      if (!ptwrite || !*ptwrite)
>>          vcpu->last_pte_updated = shadow_pte;
>>  }
>> @@ -512,19 +526,22 @@ static void FNAME(prefetch_page)(struct 
>> kvm_vcpu *vcpu,
>>  {
>>      int i;
>>      pt_element_t *gpt;
>> +    struct page *page;
>>  
>>      if (sp->role.metaphysical || PTTYPE == 32) {
>>          nonpaging_prefetch_page(vcpu, sp);
>>          return;
>>      }
>>  
>> -    gpt = kmap_atomic(gfn_to_page(vcpu->kvm, sp->gfn), KM_USER0);
>> +    page = gfn_to_page(vcpu->kvm, sp->gfn);
>> +    gpt = kmap_atomic(page, KM_USER0);
>>      for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
>>          if (is_present_pte(gpt[i]))
>>              sp->spt[i] = shadow_trap_nonpresent_pte;
>>          else
>>              sp->spt[i] = shadow_notrap_nonpresent_pte;
>>      kunmap_atomic(gpt, KM_USER0);
>> +    put_page(page);
>>  }
>>  
>>  #undef pt_element_t
>> -- 1.5.2.4
>


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]         ` <471126D9.4030204-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-10-13 20:41           ` Izik Eidus
       [not found]             ` <47112D66.4020500-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Izik Eidus @ 2007-10-13 20:41 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Izik Eidus wrote:
> Anthony Liguori wrote:
>> Izik Eidus wrote:
>>>
>>> @@ -1058,8 +1038,27 @@ struct page *gfn_to_page(struct kvm *kvm, 
>>> gfn_t gfn)
>>>  
>>>      gfn = unalias_gfn(kvm, gfn);
>>>      slot = __gfn_to_memslot(kvm, gfn);
>>> -    if (!slot)
>>> +    if (!slot) {
>>> +        get_page(bad_page);
>>>          return bad_page;
>>> +    }
>>> +    if (slot->user_alloc) {
>>> +        struct page *page[1];
>>> +        int npages;
>>> +
>>> +        down_read(&current->mm->mmap_sem);
>>> +        npages = get_user_pages(current, current->mm,
>>> +                    slot->userspace_addr
>>> +                    + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
>>> +                    1, 0, page, NULL);
>>> +        up_read(&current->mm->mmap_sem);
>>> +        if (npages != 1) {
>>> +            get_page(bad_page);
>>> +            return bad_page;
>>> +        }
>>> +        return page[0];
>>>   
>>
>> Wouldn't it be necessary to assign page[0] to slot->phys_mem[gfn - 
>> slot->base_gfn]?
>
sorry, it seems like i missunderstand you in the answer i gave you.
it wouldnt be necessary to assign page[0] to slot->phys_mem[gfn - 
slot->base_gfn], beacuse phys_mem wont have any memory allocate by this 
time.

with this patch, we are not holding anymore (when using userspace 
allocation) array of all the memory at phys_mem.
beacuse now that the pages are swappable, the physical address pointed 
by the virtual address all the time change (for example when swapping happn)
so no one promise us that slot->phys_mem[gfn - slot->base_gfn] will 
really point to page holding the gfn page.

so what we did, is throw away the phys_mem array (also nice beacuse it 
waste less ram), and at runtime we are getting the pages by using the 
virtual address
beacuse the reference of the page get increased, it promised us that 
untill we release it point to the gfn (release it by doing put_page)

hope i was more clear this time :)


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]             ` <47112D66.4020500-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-10-13 23:17               ` Anthony Liguori
       [not found]                 ` <47115207.3090909-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Anthony Liguori @ 2007-10-13 23:17 UTC (permalink / raw)
  To: Izik Eidus; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Izik Eidus wrote:
> Izik Eidus wrote:
>> Anthony Liguori wrote:
>>> Izik Eidus wrote:
>>>>
>>>> @@ -1058,8 +1038,27 @@ struct page *gfn_to_page(struct kvm *kvm, 
>>>> gfn_t gfn)
>>>>  
>>>>      gfn = unalias_gfn(kvm, gfn);
>>>>      slot = __gfn_to_memslot(kvm, gfn);
>>>> -    if (!slot)
>>>> +    if (!slot) {
>>>> +        get_page(bad_page);
>>>>          return bad_page;
>>>> +    }
>>>> +    if (slot->user_alloc) {
>>>> +        struct page *page[1];
>>>> +        int npages;
>>>> +
>>>> +        down_read(&current->mm->mmap_sem);
>>>> +        npages = get_user_pages(current, current->mm,
>>>> +                    slot->userspace_addr
>>>> +                    + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
>>>> +                    1, 0, page, NULL);
>>>> +        up_read(&current->mm->mmap_sem);
>>>> +        if (npages != 1) {
>>>> +            get_page(bad_page);
>>>> +            return bad_page;
>>>> +        }
>>>> +        return page[0];
>>>>   
>>>
>>> Wouldn't it be necessary to assign page[0] to slot->phys_mem[gfn - 
>>> slot->base_gfn]?
>>
> sorry, it seems like i missunderstand you in the answer i gave you.
> it wouldnt be necessary to assign page[0] to slot->phys_mem[gfn - 
> slot->base_gfn], beacuse phys_mem wont have any memory allocate by 
> this time.
>
> with this patch, we are not holding anymore (when using userspace 
> allocation) array of all the memory at phys_mem.
> beacuse now that the pages are swappable, the physical address pointed 
> by the virtual address all the time change (for example when swapping 
> happn)
> so no one promise us that slot->phys_mem[gfn - slot->base_gfn] will 
> really point to page holding the gfn page.
>
> so what we did, is throw away the phys_mem array (also nice beacuse it 
> waste less ram), and at runtime we are getting the pages by using the 
> virtual address
> beacuse the reference of the page get increased, it promised us that 
> untill we release it point to the gfn (release it by doing put_page)
>
> hope i was more clear this time :)

Yes, that makes sense!

I wonder if there's a more elegant way dealing with older userspaces.  
For instance, is there any reason why we can allocate a userspace memory 
region on behalf of userspace.  That way swap would even work with older 
userspaces.

Regards,

Anthony Liguori



-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                 ` <47115207.3090909-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
@ 2007-10-13 23:26                   ` Izik Eidus
       [not found]                     ` <4711542F.2060306-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Izik Eidus @ 2007-10-13 23:26 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Anthony Liguori wrote:
> Izik Eidus wrote:
>> Izik Eidus wrote:
>>> Anthony Liguori wrote:
>>>> Izik Eidus wrote:
>>>>>
>>>>> @@ -1058,8 +1038,27 @@ struct page *gfn_to_page(struct kvm *kvm, 
>>>>> gfn_t gfn)
>>>>>  
>>>>>      gfn = unalias_gfn(kvm, gfn);
>>>>>      slot = __gfn_to_memslot(kvm, gfn);
>>>>> -    if (!slot)
>>>>> +    if (!slot) {
>>>>> +        get_page(bad_page);
>>>>>          return bad_page;
>>>>> +    }
>>>>> +    if (slot->user_alloc) {
>>>>> +        struct page *page[1];
>>>>> +        int npages;
>>>>> +
>>>>> +        down_read(&current->mm->mmap_sem);
>>>>> +        npages = get_user_pages(current, current->mm,
>>>>> +                    slot->userspace_addr
>>>>> +                    + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
>>>>> +                    1, 0, page, NULL);
>>>>> +        up_read(&current->mm->mmap_sem);
>>>>> +        if (npages != 1) {
>>>>> +            get_page(bad_page);
>>>>> +            return bad_page;
>>>>> +        }
>>>>> +        return page[0];
>>>>>   
>>>>
>>>> Wouldn't it be necessary to assign page[0] to slot->phys_mem[gfn - 
>>>> slot->base_gfn]?
>>>
>> sorry, it seems like i missunderstand you in the answer i gave you.
>> it wouldnt be necessary to assign page[0] to slot->phys_mem[gfn - 
>> slot->base_gfn], beacuse phys_mem wont have any memory allocate by 
>> this time.
>>
>> with this patch, we are not holding anymore (when using userspace 
>> allocation) array of all the memory at phys_mem.
>> beacuse now that the pages are swappable, the physical address 
>> pointed by the virtual address all the time change (for example when 
>> swapping happn)
>> so no one promise us that slot->phys_mem[gfn - slot->base_gfn] will 
>> really point to page holding the gfn page.
>>
>> so what we did, is throw away the phys_mem array (also nice beacuse 
>> it waste less ram), and at runtime we are getting the pages by using 
>> the virtual address
>> beacuse the reference of the page get increased, it promised us that 
>> untill we release it point to the gfn (release it by doing put_page)
>>
>> hope i was more clear this time :)
>
> Yes, that makes sense!
>
> I wonder if there's a more elegant way dealing with older userspaces.  
> For instance, is there any reason why we can allocate a userspace 
> memory region on behalf of userspace.  That way swap would even work 
> with older userspaces.
if we can do that, yes swap will work on older userspace.
> Regards,
>
> Anthony Liguori
>
>


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                     ` <4711542F.2060306-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-10-13 23:35                       ` Anthony Liguori
       [not found]                         ` <4711563D.8020000-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Anthony Liguori @ 2007-10-13 23:35 UTC (permalink / raw)
  To: Izik Eidus; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Izik Eidus wrote:
> Anthony Liguori wrote:
>> Izik Eidus wrote:
>>> Izik Eidus wrote:
>>>> Anthony Liguori wrote:
>>>>> Izik Eidus wrote:
>>>>>>
>>>>>> @@ -1058,8 +1038,27 @@ struct page *gfn_to_page(struct kvm *kvm, 
>>>>>> gfn_t gfn)
>>>>>>  
>>>>>>      gfn = unalias_gfn(kvm, gfn);
>>>>>>      slot = __gfn_to_memslot(kvm, gfn);
>>>>>> -    if (!slot)
>>>>>> +    if (!slot) {
>>>>>> +        get_page(bad_page);
>>>>>>          return bad_page;
>>>>>> +    }
>>>>>> +    if (slot->user_alloc) {
>>>>>> +        struct page *page[1];
>>>>>> +        int npages;
>>>>>> +
>>>>>> +        down_read(&current->mm->mmap_sem);
>>>>>> +        npages = get_user_pages(current, current->mm,
>>>>>> +                    slot->userspace_addr
>>>>>> +                    + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
>>>>>> +                    1, 0, page, NULL);
>>>>>> +        up_read(&current->mm->mmap_sem);
>>>>>> +        if (npages != 1) {
>>>>>> +            get_page(bad_page);
>>>>>> +            return bad_page;
>>>>>> +        }
>>>>>> +        return page[0];
>>>>>>   
>>>>>
>>>>> Wouldn't it be necessary to assign page[0] to slot->phys_mem[gfn - 
>>>>> slot->base_gfn]?
>>>>
>>> sorry, it seems like i missunderstand you in the answer i gave you.
>>> it wouldnt be necessary to assign page[0] to slot->phys_mem[gfn - 
>>> slot->base_gfn], beacuse phys_mem wont have any memory allocate by 
>>> this time.
>>>
>>> with this patch, we are not holding anymore (when using userspace 
>>> allocation) array of all the memory at phys_mem.
>>> beacuse now that the pages are swappable, the physical address 
>>> pointed by the virtual address all the time change (for example when 
>>> swapping happn)
>>> so no one promise us that slot->phys_mem[gfn - slot->base_gfn] will 
>>> really point to page holding the gfn page.
>>>
>>> so what we did, is throw away the phys_mem array (also nice beacuse 
>>> it waste less ram), and at runtime we are getting the pages by using 
>>> the virtual address
>>> beacuse the reference of the page get increased, it promised us that 
>>> untill we release it point to the gfn (release it by doing put_page)
>>>
>>> hope i was more clear this time :)
>>
>> Yes, that makes sense!
>>
>> I wonder if there's a more elegant way dealing with older 
>> userspaces.  For instance, is there any reason why we can allocate a 
>> userspace memory region on behalf of userspace.  That way swap would 
>> even work with older userspaces.
> if we can do that, yes swap will work on older userspace.

I think it's just a matter of calling do_mmap() with the appropriate 
parameters.  It looks likes there's some drivers call do_mmap() directly.

Regards,

Anthony Liguori

>> Regards,
>>
>> Anthony Liguori
>>
>>
>
>


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                         ` <4711563D.8020000-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
@ 2007-10-13 23:38                           ` Izik Eidus
       [not found]                             ` <471156D9.3030700-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  2007-10-14  6:02                           ` Avi Kivity
  1 sibling, 1 reply; 19+ messages in thread
From: Izik Eidus @ 2007-10-13 23:38 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Anthony Liguori wrote:
> Izik Eidus wrote:
>> Anthony Liguori wrote:
>>> Izik Eidus wrote:
>>>> Izik Eidus wrote:
>>>>> Anthony Liguori wrote:
>>>>>> Izik Eidus wrote:
>>>>>>>
>>>>>>> @@ -1058,8 +1038,27 @@ struct page *gfn_to_page(struct kvm *kvm, 
>>>>>>> gfn_t gfn)
>>>>>>>  
>>>>>>>      gfn = unalias_gfn(kvm, gfn);
>>>>>>>      slot = __gfn_to_memslot(kvm, gfn);
>>>>>>> -    if (!slot)
>>>>>>> +    if (!slot) {
>>>>>>> +        get_page(bad_page);
>>>>>>>          return bad_page;
>>>>>>> +    }
>>>>>>> +    if (slot->user_alloc) {
>>>>>>> +        struct page *page[1];
>>>>>>> +        int npages;
>>>>>>> +
>>>>>>> +        down_read(&current->mm->mmap_sem);
>>>>>>> +        npages = get_user_pages(current, current->mm,
>>>>>>> +                    slot->userspace_addr
>>>>>>> +                    + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
>>>>>>> +                    1, 0, page, NULL);
>>>>>>> +        up_read(&current->mm->mmap_sem);
>>>>>>> +        if (npages != 1) {
>>>>>>> +            get_page(bad_page);
>>>>>>> +            return bad_page;
>>>>>>> +        }
>>>>>>> +        return page[0];
>>>>>>>   
>>>>>>
>>>>>> Wouldn't it be necessary to assign page[0] to slot->phys_mem[gfn 
>>>>>> - slot->base_gfn]?
>>>>>
>>>> sorry, it seems like i missunderstand you in the answer i gave you.
>>>> it wouldnt be necessary to assign page[0] to slot->phys_mem[gfn - 
>>>> slot->base_gfn], beacuse phys_mem wont have any memory allocate by 
>>>> this time.
>>>>
>>>> with this patch, we are not holding anymore (when using userspace 
>>>> allocation) array of all the memory at phys_mem.
>>>> beacuse now that the pages are swappable, the physical address 
>>>> pointed by the virtual address all the time change (for example 
>>>> when swapping happn)
>>>> so no one promise us that slot->phys_mem[gfn - slot->base_gfn] will 
>>>> really point to page holding the gfn page.
>>>>
>>>> so what we did, is throw away the phys_mem array (also nice beacuse 
>>>> it waste less ram), and at runtime we are getting the pages by 
>>>> using the virtual address
>>>> beacuse the reference of the page get increased, it promised us 
>>>> that untill we release it point to the gfn (release it by doing 
>>>> put_page)
>>>>
>>>> hope i was more clear this time :)
>>>
>>> Yes, that makes sense!
>>>
>>> I wonder if there's a more elegant way dealing with older 
>>> userspaces.  For instance, is there any reason why we can allocate a 
>>> userspace memory region on behalf of userspace.  That way swap would 
>>> even work with older userspaces.
>> if we can do that, yes swap will work on older userspace.
>
> I think it's just a matter of calling do_mmap() with the appropriate 
> parameters.  It looks likes there's some drivers call do_mmap() directly.
>
yea, i think you right, this is excellent idea!, when we will merge the 
swapping to kvm, we will add swapping support to older userspace.

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                             ` <471156D9.3030700-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-10-14  0:36                               ` Anthony Liguori
       [not found]                                 ` <47116486.4070609-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Anthony Liguori @ 2007-10-14  0:36 UTC (permalink / raw)
  To: Izik Eidus; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

[-- Attachment #1: Type: text/plain, Size: 762 bytes --]

Izik Eidus wrote:
> Anthony Liguori wrote:
>>
>> I think it's just a matter of calling do_mmap() with the appropriate 
>> parameters.  It looks likes there's some drivers call do_mmap() 
>> directly.
>>
> yea, i think you right, this is excellent idea!, when we will merge 
> the swapping to kvm, we will add swapping support to older userspace.

Here's a patch against your series.  The memset in kvmctl ends up making 
the guest use all physical memory to start off with but I did confirm 
that once the system is under memory pressure, the guest's memory 
becomes swappable.  Of course, it's quite painful :-)

A nice thing though is that a lot of the code becomes a bit cleaner and 
we can eliminate the phys_mem array entirely.

Regards,

Anthony Liguori



[-- Attachment #2: swap-old-userspace.diff --]
[-- Type: text/x-patch, Size: 4754 bytes --]

Subject: [PATCH] Allocate userspace memory for older userspace
From: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Cc: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Cc: Izik Eidus <izike-atKUWr5tajBWk0Htik3J/w@public.gmane.org>

Allocate a userspace buffer for older userspaces.  Also eliminate phys_mem
buffer.  The memset() in kvmctl really kills initial memory usage but swapping
does even with old userspaces.

Signed-off-by: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 74b427f..c904ea3 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -405,10 +405,8 @@ struct kvm_memory_slot {
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long flags;
-	struct page **phys_mem;
 	unsigned long *rmap;
 	unsigned long *dirty_bitmap;
-	int user_alloc; /* user allocated memory */
 	unsigned long userspace_addr;
 };
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index f58d49b..aa5666f 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/profile.h>
 #include <linux/kvm_para.h>
 #include <linux/pagemap.h>
+#include <linux/mman.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -319,34 +320,18 @@ static struct kvm *kvm_create_vm(void)
 	return kvm;
 }
 
-static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
-{
-	int i;
-
-	for (i = 0; i < free->npages; ++i)
-		if (free->phys_mem[i])
-			__free_page(free->phys_mem[i]);
-}
-
 /*
  * Free any memory in @free but not in @dont.
  */
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
-	if (!dont || free->phys_mem != dont->phys_mem)
-		if (free->phys_mem) {
-			if (!free->user_alloc)
-				kvm_free_kernel_physmem(free);
-			vfree(free->phys_mem);
-		}
 	if (!dont || free->rmap != dont->rmap)
 		vfree(free->rmap);
 
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		vfree(free->dirty_bitmap);
 
-	free->phys_mem = NULL;
 	free->npages = 0;
 	free->dirty_bitmap = NULL;
 }
@@ -731,10 +716,6 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 			goto out_unlock;
 	}
 
-	/* Deallocate if slot is being removed */
-	if (!npages)
-		new.phys_mem = NULL;
-
 	/* Free page dirty bitmap if unneeded */
 	if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
 		new.dirty_bitmap = NULL;
@@ -742,29 +723,27 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 	r = -ENOMEM;
 
 	/* Allocate if a slot is being created */
-	if (npages && !new.phys_mem) {
-		new.phys_mem = vmalloc(npages * sizeof(struct page *));
-
-		if (!new.phys_mem)
-			goto out_unlock;
-
+	if (npages) {
 		new.rmap = vmalloc(npages * sizeof(struct page *));
 
 		if (!new.rmap)
 			goto out_unlock;
 
-		memset(new.phys_mem, 0, npages * sizeof(struct page *));
 		memset(new.rmap, 0, npages * sizeof(*new.rmap));
-		if (user_alloc) {
-			new.user_alloc = 1;
+
+		if (user_alloc)
 			new.userspace_addr = mem->userspace_addr;
-		} else {
-			for (i = 0; i < npages; ++i) {
-				new.phys_mem[i] = alloc_page(GFP_HIGHUSER
-							     | __GFP_ZERO);
-				if (!new.phys_mem[i])
-					goto out_unlock;
-			}
+		else {
+			down_write(&current->mm->mmap_sem);
+			new.userspace_addr = do_mmap(NULL, 0,
+						     npages * PAGE_SIZE,
+						     PROT_READ | PROT_WRITE,
+						     MAP_SHARED | MAP_ANONYMOUS,
+						     0);
+			up_write(&current->mm->mmap_sem);
+
+			if (new.userspace_addr > -1024UL)
+				goto out_unlock;
 		}
 	}
 
@@ -1029,6 +1008,8 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *slot;
+	struct page *page[1];
+	int npages;
 
 	gfn = unalias_gfn(kvm, gfn);
 	slot = __gfn_to_memslot(kvm, gfn);
@@ -1036,24 +1017,19 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 		get_page(bad_page);
 		return bad_page;
 	}
-	if (slot->user_alloc) {
-		struct page *page[1];
-		int npages;
-
-		down_read(&current->mm->mmap_sem);
-		npages = get_user_pages(current, current->mm,
-					slot->userspace_addr
-					+ (gfn - slot->base_gfn) * PAGE_SIZE, 1,
-					1, 0, page, NULL);
-		up_read(&current->mm->mmap_sem);
-		if (npages != 1) {
-			get_page(bad_page);
-			return bad_page;
-		}
-		return page[0];
+
+	down_read(&current->mm->mmap_sem);
+	npages = get_user_pages(current, current->mm,
+				slot->userspace_addr
+				+ (gfn - slot->base_gfn) * PAGE_SIZE, 1,
+				1, 0, page, NULL);
+	up_read(&current->mm->mmap_sem);
+	if (npages != 1) {
+		get_page(bad_page);
+		return bad_page;
 	}
-	get_page(slot->phys_mem[gfn - slot->base_gfn]);
-	return slot->phys_mem[gfn - slot->base_gfn];
+
+	return page[0];
 }
 EXPORT_SYMBOL_GPL(gfn_to_page);
 

[-- Attachment #3: Type: text/plain, Size: 314 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

[-- Attachment #4: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                         ` <4711563D.8020000-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
  2007-10-13 23:38                           ` Izik Eidus
@ 2007-10-14  6:02                           ` Avi Kivity
       [not found]                             ` <4711B101.7070305-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  1 sibling, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2007-10-14  6:02 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Anthony Liguori wrote:
> Izik Eidus wrote:
>   
>> Anthony Liguori wrote:
>>     
>>> Izik Eidus wrote:
>>>       
>>>> Izik Eidus wrote:
>>>>         
>>>>> Anthony Liguori wrote:
>>>>>           
>>>>>> Izik Eidus wrote:
>>>>>>             
>>>>>>> @@ -1058,8 +1038,27 @@ struct page *gfn_to_page(struct kvm *kvm, 
>>>>>>> gfn_t gfn)
>>>>>>>  
>>>>>>>      gfn = unalias_gfn(kvm, gfn);
>>>>>>>      slot = __gfn_to_memslot(kvm, gfn);
>>>>>>> -    if (!slot)
>>>>>>> +    if (!slot) {
>>>>>>> +        get_page(bad_page);
>>>>>>>          return bad_page;
>>>>>>> +    }
>>>>>>> +    if (slot->user_alloc) {
>>>>>>> +        struct page *page[1];
>>>>>>> +        int npages;
>>>>>>> +
>>>>>>> +        down_read(&current->mm->mmap_sem);
>>>>>>> +        npages = get_user_pages(current, current->mm,
>>>>>>> +                    slot->userspace_addr
>>>>>>> +                    + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
>>>>>>> +                    1, 0, page, NULL);
>>>>>>> +        up_read(&current->mm->mmap_sem);
>>>>>>> +        if (npages != 1) {
>>>>>>> +            get_page(bad_page);
>>>>>>> +            return bad_page;
>>>>>>> +        }
>>>>>>> +        return page[0];
>>>>>>>   
>>>>>>>               
>>>>>> Wouldn't it be necessary to assign page[0] to slot->phys_mem[gfn - 
>>>>>> slot->base_gfn]?
>>>>>>             
>>>> sorry, it seems like i missunderstand you in the answer i gave you.
>>>> it wouldnt be necessary to assign page[0] to slot->phys_mem[gfn - 
>>>> slot->base_gfn], beacuse phys_mem wont have any memory allocate by 
>>>> this time.
>>>>
>>>> with this patch, we are not holding anymore (when using userspace 
>>>> allocation) array of all the memory at phys_mem.
>>>> beacuse now that the pages are swappable, the physical address 
>>>> pointed by the virtual address all the time change (for example when 
>>>> swapping happn)
>>>> so no one promise us that slot->phys_mem[gfn - slot->base_gfn] will 
>>>> really point to page holding the gfn page.
>>>>
>>>> so what we did, is throw away the phys_mem array (also nice beacuse 
>>>> it waste less ram), and at runtime we are getting the pages by using 
>>>> the virtual address
>>>> beacuse the reference of the page get increased, it promised us that 
>>>> untill we release it point to the gfn (release it by doing put_page)
>>>>
>>>> hope i was more clear this time :)
>>>>         
>>> Yes, that makes sense!
>>>
>>> I wonder if there's a more elegant way dealing with older 
>>> userspaces.  For instance, is there any reason why we can allocate a 
>>> userspace memory region on behalf of userspace.  That way swap would 
>>> even work with older userspaces.
>>>       
>> if we can do that, yes swap will work on older userspace.
>>     
>
> I think it's just a matter of calling do_mmap() with the appropriate 
> parameters.  It looks likes there's some drivers call do_mmap() directly.
>
>   

This will halve the maximum size of virtual machines on i386 since
userspace will also mmap() the memory, and the virtual address space is
restricted to 3GB.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                                 ` <47116486.4070609-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
@ 2007-10-14  6:12                                   ` Izik Eidus
  2007-10-15 19:26                                   ` Anthony Liguori
  1 sibling, 0 replies; 19+ messages in thread
From: Izik Eidus @ 2007-10-14  6:12 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Anthony Liguori wrote:
> Izik Eidus wrote:
>> Anthony Liguori wrote:
>>>
>>> I think it's just a matter of calling do_mmap() with the appropriate 
>>> parameters.  It looks likes there's some drivers call do_mmap() 
>>> directly.
>>>
>> yea, i think you right, this is excellent idea!, when we will merge 
>> the swapping to kvm, we will add swapping support to older userspace.
>
> Here's a patch against your series.  The memset in kvmctl ends up 
> making the guest use all physical memory to start off with but I did 
> confirm that once the system is under memory pressure, the guest's 
> memory becomes swappable.  Of course, it's quite painful :-)
>
> A nice thing though is that a lot of the code becomes a bit cleaner 
> and we can eliminate the phys_mem array entirely.
>
> Regards,
>
> Anthony Liguori
>
>
great, this sound good, i will test it as soon as i can :)

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                             ` <4711B101.7070305-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-10-14  7:46                               ` Avi Kivity
  2007-10-14 14:57                               ` Anthony Liguori
  1 sibling, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2007-10-14  7:46 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Avi Kivity wrote:
>>>> I wonder if there's a more elegant way dealing with older 
>>>> userspaces.  For instance, is there any reason why we can allocate a 
>>>> userspace memory region on behalf of userspace.  That way swap would 
>>>> even work with older userspaces.
>>>>       
>>>>         
>>> if we can do that, yes swap will work on older userspace.
>>>     
>>>       
>> I think it's just a matter of calling do_mmap() with the appropriate 
>> parameters.  It looks likes there's some drivers call do_mmap() directly.
>>
>>   
>>     
>
> This will halve the maximum size of virtual machines on i386 since
> userspace will also mmap() the memory, and the virtual address space is
> restricted to 3GB.
>
>   

Upon further reflection, considering that this will only be generally
available in a kernel in six months (2.6.25) and that new userspace has
already been released, and that only i386 is affected, the
simplification may be worthwhile.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                             ` <4711B101.7070305-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  2007-10-14  7:46                               ` Avi Kivity
@ 2007-10-14 14:57                               ` Anthony Liguori
       [not found]                                 ` <47122E57.7060308-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
  1 sibling, 1 reply; 19+ messages in thread
From: Anthony Liguori @ 2007-10-14 14:57 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Avi Kivity wrote:
> Anthony Liguori wrote:
>   
>> I think it's just a matter of calling do_mmap() with the appropriate 
>> parameters.  It looks likes there's some drivers call do_mmap() directly.
>>
>>   
>>     
>
> This will halve the maximum size of virtual machines on i386 since
> userspace will also mmap() the memory, and the virtual address space is
> restricted to 3GB.
>   

I wonder if there is a way to force the mmap() to return the userspace 
address we previously allocated.

Regards,

Anthony Liguori


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                                 ` <47122E57.7060308-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
@ 2007-10-14 16:08                                   ` Avi Kivity
       [not found]                                     ` <47123EFA.1010808-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2007-10-14 16:08 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Anthony Liguori wrote:
> Avi Kivity wrote:
>> Anthony Liguori wrote:
>>  
>>> I think it's just a matter of calling do_mmap() with the appropriate 
>>> parameters.  It looks likes there's some drivers call do_mmap() 
>>> directly.
>>>
>>>       
>>
>> This will halve the maximum size of virtual machines on i386 since
>> userspace will also mmap() the memory, and the virtual address space is
>> restricted to 3GB.
>>   
>
> I wonder if there is a way to force the mmap() to return the userspace 
> address we previously allocated.
>

Yes:

>
> unsigned long
> get_unmapped_area(struct file *file, unsigned long addr, unsigned long 
> len,
>                 unsigned long pgoff, unsigned long flags)
> {
>         unsigned long (*get_area)(struct file *, unsigned long,
>                                   unsigned long, unsigned long, 
> unsigned long);
>
>         get_area = current->mm->get_unmapped_area;
>         if (file && file->f_op && file->f_op->get_unmapped_area)
>                 get_area = file->f_op->get_unmapped_area;
>         addr = get_area(file, addr, len, pgoff, flags);


-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                                     ` <47123EFA.1010808-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-10-14 17:11                                       ` Anthony Liguori
       [not found]                                         ` <47124DBB.9030708-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Anthony Liguori @ 2007-10-14 17:11 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Avi Kivity wrote:
> Anthony Liguori wrote:
>> Avi Kivity wrote:
>>> Anthony Liguori wrote:
>>>  
>>>> I think it's just a matter of calling do_mmap() with the 
>>>> appropriate parameters.  It looks likes there's some drivers call 
>>>> do_mmap() directly.
>>>>
>>>>       
>>>
>>> This will halve the maximum size of virtual machines on i386 since
>>> userspace will also mmap() the memory, and the virtual address space is
>>> restricted to 3GB.
>>>   
>>
>> I wonder if there is a way to force the mmap() to return the 
>> userspace address we previously allocated.
>>
>
> Yes:
>
>>
>> unsigned long
>> get_unmapped_area(struct file *file, unsigned long addr, unsigned 
>> long len,
>>                 unsigned long pgoff, unsigned long flags)
>> {
>>         unsigned long (*get_area)(struct file *, unsigned long,
>>                                   unsigned long, unsigned long, 
>> unsigned long);
>>
>>         get_area = current->mm->get_unmapped_area;
>>         if (file && file->f_op && file->f_op->get_unmapped_area)
>>                 get_area = file->f_op->get_unmapped_area;
>>         addr = get_area(file, addr, len, pgoff, flags);

Hrm, so you can return the area that should be used but you still 
install vma_ops?  Wouldn't that result in an infinite loop when 
userspace attempted to map the area since we'll try to satisfy the 
nopage handler by doing a get_user_pages?

Regards,

Anthony Liguori



-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                                         ` <47124DBB.9030708-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
@ 2007-10-14 17:16                                           ` Avi Kivity
  0 siblings, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2007-10-14 17:16 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Anthony Liguori wrote:
> Avi Kivity wrote:
>> Anthony Liguori wrote:
>>> Avi Kivity wrote:
>>>> Anthony Liguori wrote:
>>>>  
>>>>> I think it's just a matter of calling do_mmap() with the 
>>>>> appropriate parameters.  It looks likes there's some drivers call 
>>>>> do_mmap() directly.
>>>>>
>>>>>       
>>>>
>>>> This will halve the maximum size of virtual machines on i386 since
>>>> userspace will also mmap() the memory, and the virtual address 
>>>> space is
>>>> restricted to 3GB.
>>>>   
>>>
>>> I wonder if there is a way to force the mmap() to return the 
>>> userspace address we previously allocated.
>>>
>>
>> Yes:
>>
>>>
>>> unsigned long
>>> get_unmapped_area(struct file *file, unsigned long addr, unsigned 
>>> long len,
>>>                 unsigned long pgoff, unsigned long flags)
>>> {
>>>         unsigned long (*get_area)(struct file *, unsigned long,
>>>                                   unsigned long, unsigned long, 
>>> unsigned long);
>>>
>>>         get_area = current->mm->get_unmapped_area;
>>>         if (file && file->f_op && file->f_op->get_unmapped_area)
>>>                 get_area = file->f_op->get_unmapped_area;
>>>         addr = get_area(file, addr, len, pgoff, flags);
>
> Hrm, so you can return the area that should be used but you still 
> install vma_ops?  Wouldn't that result in an infinite loop when 
> userspace attempted to map the area since we'll try to satisfy the 
> nopage handler by doing a get_user_pages?

->nopage() should refer to the internal array in this case.

btw, while it seems that it is possible to override addr, it is not at 
all certain that it is legal to return a previously mapped area.

However, even if it isn't workable, I'm willing to live with the halved 
guest size limitation, since it's only for six month old userspace 
running large guests on i386.

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                                 ` <47116486.4070609-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
  2007-10-14  6:12                                   ` Izik Eidus
@ 2007-10-15 19:26                                   ` Anthony Liguori
       [not found]                                     ` <4713BED9.7000302-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
  1 sibling, 1 reply; 19+ messages in thread
From: Anthony Liguori @ 2007-10-15 19:26 UTC (permalink / raw)
  To: Izik Eidus; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

[-- Attachment #1: Type: text/plain, Size: 1319 bytes --]

Anthony Liguori wrote:
> Izik Eidus wrote:
>> Anthony Liguori wrote:
>>>
>>> I think it's just a matter of calling do_mmap() with the appropriate 
>>> parameters.  It looks likes there's some drivers call do_mmap() 
>>> directly.
>>>
>> yea, i think you right, this is excellent idea!, when we will merge 
>> the swapping to kvm, we will add swapping support to older userspace.
>
> Here's a patch against your series.  The memset in kvmctl ends up 
> making the guest use all physical memory to start off with but I did 
> confirm that once the system is under memory pressure, the guest's 
> memory becomes swappable.  Of course, it's quite painful :-)
>
> A nice thing though is that a lot of the code becomes a bit cleaner 
> and we can eliminate the phys_mem array entirely.

>      /* Allocate if a slot is being created */
> -    if (npages && !new.phys_mem) {
> -        new.phys_mem = vmalloc(npages * sizeof(struct page *));
> -
> -        if (!new.phys_mem)
> -            goto out_unlock;
> -
> +    if (npages) {

This is wrong apparently.  We needed to have new.phys_mem to indicate 
whether the slot has been created yet or not.  The new attached patch 
uses new.rmap instead of new.phys_mem to accomplish the same goal.

I no longer see the BUG() that I was seeing.

> Regards,
>
> Anthony Liguori
>
>


[-- Attachment #2: swap-old-userspace.diff --]
[-- Type: text/x-patch, Size: 4883 bytes --]

Subject: [PATCH] Allocate userspace memory for older userspace (v2)
From: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Cc: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Cc: Izik Eidus <izike-atKUWr5tajBWk0Htik3J/w@public.gmane.org>

Allocate a userspace buffer for older userspaces.  Also eliminate phys_mem
buffer.  The memset() in kvmctl really kills initial memory usage but swapping
does even with old userspaces.

Since v1, fixed a bug in slot creation.

Signed-off-by: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 74b427f..c904ea3 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -405,10 +405,8 @@ struct kvm_memory_slot {
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long flags;
-	struct page **phys_mem;
 	unsigned long *rmap;
 	unsigned long *dirty_bitmap;
-	int user_alloc; /* user allocated memory */
 	unsigned long userspace_addr;
 };
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index f58d49b..17d3c7d 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/profile.h>
 #include <linux/kvm_para.h>
 #include <linux/pagemap.h>
+#include <linux/mman.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -319,36 +320,21 @@ static struct kvm *kvm_create_vm(void)
 	return kvm;
 }
 
-static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
-{
-	int i;
-
-	for (i = 0; i < free->npages; ++i)
-		if (free->phys_mem[i])
-			__free_page(free->phys_mem[i]);
-}
-
 /*
  * Free any memory in @free but not in @dont.
  */
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
-	if (!dont || free->phys_mem != dont->phys_mem)
-		if (free->phys_mem) {
-			if (!free->user_alloc)
-				kvm_free_kernel_physmem(free);
-			vfree(free->phys_mem);
-		}
 	if (!dont || free->rmap != dont->rmap)
 		vfree(free->rmap);
 
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		vfree(free->dirty_bitmap);
 
-	free->phys_mem = NULL;
 	free->npages = 0;
 	free->dirty_bitmap = NULL;
+	free->rmap = NULL;
 }
 
 static void kvm_free_physmem(struct kvm *kvm)
@@ -731,10 +717,6 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 			goto out_unlock;
 	}
 
-	/* Deallocate if slot is being removed */
-	if (!npages)
-		new.phys_mem = NULL;
-
 	/* Free page dirty bitmap if unneeded */
 	if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
 		new.dirty_bitmap = NULL;
@@ -742,29 +724,27 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 	r = -ENOMEM;
 
 	/* Allocate if a slot is being created */
-	if (npages && !new.phys_mem) {
-		new.phys_mem = vmalloc(npages * sizeof(struct page *));
-
-		if (!new.phys_mem)
-			goto out_unlock;
-
+	if (npages && !new.rmap) {
 		new.rmap = vmalloc(npages * sizeof(struct page *));
 
 		if (!new.rmap)
 			goto out_unlock;
 
-		memset(new.phys_mem, 0, npages * sizeof(struct page *));
 		memset(new.rmap, 0, npages * sizeof(*new.rmap));
-		if (user_alloc) {
-			new.user_alloc = 1;
+
+		if (user_alloc)
 			new.userspace_addr = mem->userspace_addr;
-		} else {
-			for (i = 0; i < npages; ++i) {
-				new.phys_mem[i] = alloc_page(GFP_HIGHUSER
-							     | __GFP_ZERO);
-				if (!new.phys_mem[i])
-					goto out_unlock;
-			}
+		else {
+			down_write(&current->mm->mmap_sem);
+			new.userspace_addr = do_mmap(NULL, 0,
+						     npages * PAGE_SIZE,
+						     PROT_READ | PROT_WRITE,
+						     MAP_SHARED | MAP_ANONYMOUS,
+						     0);
+			up_write(&current->mm->mmap_sem);
+
+			if (new.userspace_addr > -1024UL)
+				goto out_unlock;
 		}
 	}
 
@@ -1029,6 +1009,8 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *slot;
+	struct page *page[1];
+	int npages;
 
 	gfn = unalias_gfn(kvm, gfn);
 	slot = __gfn_to_memslot(kvm, gfn);
@@ -1036,24 +1018,19 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 		get_page(bad_page);
 		return bad_page;
 	}
-	if (slot->user_alloc) {
-		struct page *page[1];
-		int npages;
-
-		down_read(&current->mm->mmap_sem);
-		npages = get_user_pages(current, current->mm,
-					slot->userspace_addr
-					+ (gfn - slot->base_gfn) * PAGE_SIZE, 1,
-					1, 0, page, NULL);
-		up_read(&current->mm->mmap_sem);
-		if (npages != 1) {
-			get_page(bad_page);
-			return bad_page;
-		}
-		return page[0];
+
+	down_read(&current->mm->mmap_sem);
+	npages = get_user_pages(current, current->mm,
+				slot->userspace_addr
+				+ (gfn - slot->base_gfn) * PAGE_SIZE, 1,
+				1, 0, page, NULL);
+	up_read(&current->mm->mmap_sem);
+	if (npages != 1) {
+		get_page(bad_page);
+		return bad_page;
 	}
-	get_page(slot->phys_mem[gfn - slot->base_gfn]);
-	return slot->phys_mem[gfn - slot->base_gfn];
+
+	return page[0];
 }
 EXPORT_SYMBOL_GPL(gfn_to_page);
 

[-- Attachment #3: Type: text/plain, Size: 314 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

[-- Attachment #4: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                                     ` <4713BED9.7000302-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
@ 2007-10-15 20:01                                       ` Izik Eidus
       [not found]                                         ` <4713C700.5010304-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 19+ messages in thread
From: Izik Eidus @ 2007-10-15 20:01 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Anthony Liguori wrote:
> Anthony Liguori wrote:
>> Izik Eidus wrote:
>>> Anthony Liguori wrote:
>>>>
>>>> I think it's just a matter of calling do_mmap() with the 
>>>> appropriate parameters.  It looks likes there's some drivers call 
>>>> do_mmap() directly.
>>>>
>>> yea, i think you right, this is excellent idea!, when we will merge 
>>> the swapping to kvm, we will add swapping support to older userspace.
>>
>> Here's a patch against your series.  The memset in kvmctl ends up 
>> making the guest use all physical memory to start off with but I did 
>> confirm that once the system is under memory pressure, the guest's 
>> memory becomes swappable.  Of course, it's quite painful :-)
>>
>> A nice thing though is that a lot of the code becomes a bit cleaner 
>> and we can eliminate the phys_mem array entirely.
>
>>      /* Allocate if a slot is being created */
>> -    if (npages && !new.phys_mem) {
>> -        new.phys_mem = vmalloc(npages * sizeof(struct page *));
>> -
>> -        if (!new.phys_mem)
>> -            goto out_unlock;
>> -
>> +    if (npages) {
>
> This is wrong apparently.  We needed to have new.phys_mem to indicate 
> whether the slot has been created yet or not.  The new attached patch 
> uses new.rmap instead of new.phys_mem to accomplish the same goal.
>
this patch look good, and indeed make all the things simple.
but why you didnt use new.phys_mem ?
> I no longer see the BUG() that I was seeing.
>
>> Regards,
>>
>> Anthony Liguori
>>
>>
>


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/4] Swapping
       [not found]                                         ` <4713C700.5010304-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-10-15 20:03                                           ` Izik Eidus
  0 siblings, 0 replies; 19+ messages in thread
From: Izik Eidus @ 2007-10-15 20:03 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Izik Eidus wrote:
> Anthony Liguori wrote:
>> Anthony Liguori wrote:
>>> Izik Eidus wrote:
>>>> Anthony Liguori wrote:
>>>>>
>>>>> I think it's just a matter of calling do_mmap() with the 
>>>>> appropriate parameters.  It looks likes there's some drivers call 
>>>>> do_mmap() directly.
>>>>>
>>>> yea, i think you right, this is excellent idea!, when we will merge 
>>>> the swapping to kvm, we will add swapping support to older userspace.
>>>
>>> Here's a patch against your series.  The memset in kvmctl ends up 
>>> making the guest use all physical memory to start off with but I did 
>>> confirm that once the system is under memory pressure, the guest's 
>>> memory becomes swappable.  Of course, it's quite painful :-)
>>>
>>> A nice thing though is that a lot of the code becomes a bit cleaner 
>>> and we can eliminate the phys_mem array entirely.
>>
>>>      /* Allocate if a slot is being created */
>>> -    if (npages && !new.phys_mem) {
>>> -        new.phys_mem = vmalloc(npages * sizeof(struct page *));
>>> -
>>> -        if (!new.phys_mem)
>>> -            goto out_unlock;
>>> -
>>> +    if (npages) {
>>
>> This is wrong apparently.  We needed to have new.phys_mem to indicate 
>> whether the slot has been created yet or not.  The new attached patch 
>> uses new.rmap instead of new.phys_mem to accomplish the same goal.
>>
> this patch look good, and indeed make all the things simple.
> but why you didnt use new.phys_mem ?
>> I no longer see the BUG() that I was seeing.
>>
>>> Regards,
>>>
>>> Anthony Liguori
>>>
>>>
>>


upsss
ignore me:)
i am idiot: :)
>
>


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2007-10-15 20:03 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-10-13  2:10 [PATCH 3/4] Swapping Izik Eidus
     [not found] ` <47102919.6070802-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-13 20:04   ` Anthony Liguori
     [not found]     ` <471124D4.3090901-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
2007-10-13 20:13       ` Izik Eidus
     [not found]         ` <471126D9.4030204-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-13 20:41           ` Izik Eidus
     [not found]             ` <47112D66.4020500-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-13 23:17               ` Anthony Liguori
     [not found]                 ` <47115207.3090909-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
2007-10-13 23:26                   ` Izik Eidus
     [not found]                     ` <4711542F.2060306-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-13 23:35                       ` Anthony Liguori
     [not found]                         ` <4711563D.8020000-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
2007-10-13 23:38                           ` Izik Eidus
     [not found]                             ` <471156D9.3030700-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-14  0:36                               ` Anthony Liguori
     [not found]                                 ` <47116486.4070609-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
2007-10-14  6:12                                   ` Izik Eidus
2007-10-15 19:26                                   ` Anthony Liguori
     [not found]                                     ` <4713BED9.7000302-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
2007-10-15 20:01                                       ` Izik Eidus
     [not found]                                         ` <4713C700.5010304-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-15 20:03                                           ` Izik Eidus
2007-10-14  6:02                           ` Avi Kivity
     [not found]                             ` <4711B101.7070305-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-14  7:46                               ` Avi Kivity
2007-10-14 14:57                               ` Anthony Liguori
     [not found]                                 ` <47122E57.7060308-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
2007-10-14 16:08                                   ` Avi Kivity
     [not found]                                     ` <47123EFA.1010808-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-14 17:11                                       ` Anthony Liguori
     [not found]                                         ` <47124DBB.9030708-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org>
2007-10-14 17:16                                           ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.