All of lore.kernel.org
 help / color / mirror / Atom feed
* (no subject)
@ 2007-08-12 19:59 Izik Eidus
       [not found] ` <64F9B87B6B770947A9F8391472E032160CBECF40-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Izik Eidus @ 2007-08-12 19:59 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f


[-- Attachment #1.1: Type: text/plain, Size: 422 bytes --]

we are working on swapping support for the guests in kvm.
we want to allow management of the memory swapping of the guests from kvm.

i wrote this patch that move the guest allocated memory from the kernel to the userspace for better management.
plus in this way it will share more code for such soultion with s390 and other archs.

this is request for comment, so any idea you have please write to me.

thanks


[-- Attachment #1.2: Type: text/html, Size: 807 bytes --]

[-- Attachment #2: kvmctl_patch --]
[-- Type: application/octet-stream, Size: 3666 bytes --]

diff --git a/user/kvmctl.c b/user/kvmctl.c
index 43b374d..887cb63 100644
--- a/user/kvmctl.c
+++ b/user/kvmctl.c
@@ -239,12 +239,13 @@ int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
 	int fd = kvm->fd;
 	int zfd;
 	int r;
-	struct kvm_memory_region low_memory = {
+
+	struct kvm_host_memory_region low_memory = {
 		.slot = 3,
 		.memory_size = memory  < dosmem ? memory : dosmem,
 		.guest_phys_addr = 0,
 	};
-	struct kvm_memory_region extended_memory = {
+	struct kvm_host_memory_region extended_memory = {
 		.slot = 0,
 		.memory_size = memory < exmem ? 0 : memory - exmem,
 		.guest_phys_addr = exmem,
@@ -259,14 +260,41 @@ int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
 	}
 	kvm->vm_fd = fd;
 
+	*vm_mem = mmap(NULL, memory, PROT_READ|PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+	if (*vm_mem == MAP_FAILED) {
+		fprintf(stderr, "kvm_create: %s", strerror(errno));
+		return -1;
+	}
+
+	r = munmap(memory + dosmem, exmem - dosmem);
+	if (r == -1) {
+		fprintf(stderr, "kvm_create: %s", strerror(errno));
+		return -1;
+	}
+
+	memset(*vm_mem, 0, dosmem);
+	memset(*vm_mem + exmem, 0, memory - exmem);
+
 	/* 640K should be enough. */
-	r = ioctl(fd, KVM_SET_MEMORY_REGION, &low_memory);
+	low_memory.guest_host_addr = *vm_mem;
+	if (!low_memory.guest_host_addr) {
+		fprintf(stderr, "kvm_create: %s", strerror(errno));
+		return -1;
+	}
+
+	r = ioctl(fd, KVM_SET_USER_MEMORY_REGION, &low_memory);
 	if (r == -1) {
 		fprintf(stderr, "kvm_create_memory_region: %m\n");
 		return -1;
 	}
 	if (extended_memory.memory_size) {
-		r = ioctl(fd, KVM_SET_MEMORY_REGION, &extended_memory);
+		extended_memory.guest_host_addr = *vm_mem + exmem;
+		if (!extended_memory.guest_host_addr) {
+			fprintf(stderr, "kvm_create: %s", strerror(errno));
+			return -1;
+		}
+
+		r = ioctl(fd, KVM_SET_USER_MEMORY_REGION, &extended_memory);
 		if (r == -1) {
 			fprintf(stderr, "kvm_create_memory_region: %m\n");
 			return -1;
@@ -276,11 +304,6 @@ int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
 	kvm_memory_region_save_params(kvm, &low_memory);
 	kvm_memory_region_save_params(kvm, &extended_memory);
 
-	*vm_mem = mmap(NULL, memory, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
-	if (*vm_mem == MAP_FAILED) {
-		fprintf(stderr, "mmap: %m\n");
-		return -1;
-	}
 	kvm->physical_memory = *vm_mem;
 
 	zfd = open("/dev/zero", O_RDONLY);
@@ -291,7 +314,7 @@ int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
 	r = kvm_create_vcpu(kvm, 0);
 	if (r < 0)
 		return r;
-
+	
 	return 0;
 }
 
@@ -302,25 +325,36 @@ void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start,
 	int r;
 	int fd = kvm->vm_fd;
 	int prot = PROT_READ;
-	struct kvm_memory_region memory = {
+	struct kvm_host_memory_region memory = {
 		.slot = slot,
 		.memory_size = len,
 		.guest_phys_addr = phys_start,
 		.flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
 	};
-
-	r = ioctl(fd, KVM_SET_MEMORY_REGION, &memory);
+	
+	if (writable)
+		prot |= PROT_WRITE;
+	
+	ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+	if (ptr == MAP_FAILED) {
+		fprintf(stderr, "kvm_create_phys_mem: %s", strerror(errno));
+		return -1;
+	}
+	
+	memset(ptr, 0, len);
+	
+	memory.guest_host_addr = ptr;
+	if (!memory.guest_host_addr) {
+		fprintf(stderr, "kvm_create_phys_mem: %s", strerror(errno));
+		return 0;
+	}
+	
+	r = ioctl(fd, KVM_SET_USER_MEMORY_REGION, &memory);
 	if (r == -1)
 	    return 0;
 
 	kvm_memory_region_save_params(kvm, &memory);
 
-	if (writable)
-		prot |= PROT_WRITE;
-
-	ptr = mmap(NULL, len, prot, MAP_SHARED, fd, phys_start);
-	if (ptr == MAP_FAILED)
-		return 0;
 	return ptr;
 }
 

[-- Attachment #3: kvm_kernel_patch --]
[-- Type: application/octet-stream, Size: 8168 bytes --]

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index fc27c2f..a94714f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -415,6 +415,7 @@ struct kvm_memory_slot {
 	unsigned long flags;
 	struct page **phys_mem;
 	unsigned long *dirty_bitmap;
+	int user_alloc; /* user allocated memory */
 };
 
 struct kvm {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index bc11c2d..1e6fa58 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -37,6 +37,7 @@
 #include <linux/cpumask.h>
 #include <linux/smp.h>
 #include <linux/anon_inodes.h>
+#include <linux/pagemap.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -322,19 +323,40 @@ static int kvm_dev_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
+{
+	int i;
+	
+	for (i = 0; i < free->npages; ++i) {
+		if (free->phys_mem[i]) {
+			if (!PageReserved(free->phys_mem[i]))
+				SetPageDirty(free->phys_mem[i]);
+			page_cache_release(free->phys_mem[i]);
+		}
+	}
+}
+
+static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
+{
+	int i;
+	
+	for (i = 0; i < free->npages; ++i)
+		if (free->phys_mem[i])
+			__free_page(free->phys_mem[i]);
+}
+
 /*
  * Free any memory in @free but not in @dont.
  */
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
-	int i;
-
 	if (!dont || free->phys_mem != dont->phys_mem)
 		if (free->phys_mem) {
-			for (i = 0; i < free->npages; ++i)
-				if (free->phys_mem[i])
-					__free_page(free->phys_mem[i]);
+			if (free->user_alloc)
+				kvm_free_userspace_physmem(free);
+			else
+				kvm_free_kernel_physmem(free);
 			vfree(free->phys_mem);
 		}
 
@@ -670,7 +692,8 @@ EXPORT_SYMBOL_GPL(fx_init);
  * Discontiguous memory is allowed, mostly for framebuffers.
  */
 static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-					  struct kvm_memory_region *mem)
+					  struct kvm_memory_region *mem,
+						unsigned long guest_host_addr)
 {
 	int r;
 	gfn_t base_gfn;
@@ -748,12 +771,26 @@ raced:
 			goto out_free;
 
 		memset(new.phys_mem, 0, npages * sizeof(struct page *));
-		for (i = 0; i < npages; ++i) {
-			new.phys_mem[i] = alloc_page(GFP_HIGHUSER
-						     | __GFP_ZERO);
-			if (!new.phys_mem[i])
+		
+		if (guest_host_addr) {
+			unsigned long pages_num;
+			
+			new.user_alloc = 1;
+			down_read(&current->mm->mmap_sem);
+			pages_num = get_user_pages(current, current->mm, guest_host_addr,
+								npages, 1, 0, new.phys_mem, NULL);
+			up_read(&current->mm->mmap_sem);
+			if (pages_num != npages)
 				goto out_free;
-			set_page_private(new.phys_mem[i],0);
+		} else {
+			new.user_alloc = 0;
+			for (i = 0; i < npages; ++i) {
+				new.phys_mem[i] = alloc_page(GFP_HIGHUSER
+							     | __GFP_ZERO);
+				if (!new.phys_mem[i])
+					goto out_free;
+				set_page_private(new.phys_mem[i],0);
+			}
 		}
 	}
 
@@ -2772,10 +2809,32 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = -EFAULT;
 		if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
 			goto out;
-		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem);
+		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem, 0);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_SET_USER_MEMORY_REGION: {
+		struct kvm_memory_region kvm_mem;
+		struct kvm_host_memory_region kvm_host_mem;
+		r = -EFAULT;
+		if (copy_from_user(&kvm_host_mem, argp, sizeof kvm_host_mem))
+			goto out;
+
+		if (!kvm_host_mem.guest_host_addr)
+			goto out;		
+
+		kvm_mem.slot = kvm_host_mem.slot;
+		kvm_mem.flags = kvm_host_mem.flags;
+		kvm_mem.guest_phys_addr = kvm_host_mem.guest_phys_addr;
+		kvm_mem.memory_size = kvm_host_mem.memory_size;
+		
+		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem,
+					kvm_host_mem.guest_host_addr);
 		if (r)
 			goto out;
 		break;
+
 	}
 	case KVM_GET_DIRTY_LOG: {
 		struct kvm_dirty_log log;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 91a446f..e53e70e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -23,6 +23,15 @@ struct kvm_memory_region {
 	__u64 memory_size; /* bytes */
 };
 
+/* for KVM_SET_USER_MEMORY_REGION */
+struct kvm_host_memory_region {
+	__u32 slot;
+	__u32 flags;
+	__u64 guest_phys_addr;
+	__u64 memory_size; /* bytes */
+	__u64 guest_host_addr; /* host userspace allocated memory address */
+};
+
 /* for kvm_memory_region::flags */
 #define KVM_MEM_LOG_DIRTY_PAGES  1UL
 
@@ -254,47 +263,50 @@ struct kvm_signal_mask {
 /*
  * ioctls for /dev/kvm fds:
  */
-#define KVM_GET_API_VERSION       _IO(KVMIO,   0x00)
-#define KVM_CREATE_VM             _IO(KVMIO,   0x01) /* returns a VM fd */
-#define KVM_GET_MSR_INDEX_LIST    _IOWR(KVMIO, 0x02, struct kvm_msr_list)
+#define KVM_GET_API_VERSION        _IO(KVMIO,   0x00)
+#define KVM_CREATE_VM              _IO(KVMIO,   0x01) /* returns a VM fd */
+#define KVM_GET_MSR_INDEX_LIST     _IOWR(KVMIO, 0x02, struct kvm_msr_list)
 /*
  * Check if a kvm extension is available.  Argument is extension number,
  * return is 1 (yes) or 0 (no, sorry).
  */
-#define KVM_CHECK_EXTENSION       _IO(KVMIO,   0x03)
+#define KVM_CHECK_EXTENSION        _IO(KVMIO,   0x03)
 /*
  * Get size for mmap(vcpu_fd)
  */
-#define KVM_GET_VCPU_MMAP_SIZE    _IO(KVMIO,   0x04) /* in bytes */
+#define KVM_GET_VCPU_MMAP_SIZE     _IO(KVMIO,   0x04) /* in bytes */
 
 /*
  * ioctls for VM fds
  */
-#define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
+#define KVM_SET_MEMORY_REGION      _IOW(KVMIO, 0x40, struct kvm_memory_region)
+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x44,\
+					 struct kvm_host_memory_region)
+
 /*
  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
  * a vcpu fd.
  */
-#define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
-#define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
-#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
+#define KVM_CREATE_VCPU            _IO(KVMIO,  0x41)
+#define KVM_GET_DIRTY_LOG          _IOW(KVMIO, 0x42, struct kvm_dirty_log)
+#define KVM_SET_MEMORY_ALIAS       _IOW(KVMIO, 0x43, struct kvm_memory_alias)
 
 /*
  * ioctls for vcpu fds
  */
-#define KVM_RUN                   _IO(KVMIO,   0x80)
-#define KVM_GET_REGS              _IOR(KVMIO,  0x81, struct kvm_regs)
-#define KVM_SET_REGS              _IOW(KVMIO,  0x82, struct kvm_regs)
-#define KVM_GET_SREGS             _IOR(KVMIO,  0x83, struct kvm_sregs)
-#define KVM_SET_SREGS             _IOW(KVMIO,  0x84, struct kvm_sregs)
-#define KVM_TRANSLATE             _IOWR(KVMIO, 0x85, struct kvm_translation)
-#define KVM_INTERRUPT             _IOW(KVMIO,  0x86, struct kvm_interrupt)
-#define KVM_DEBUG_GUEST           _IOW(KVMIO,  0x87, struct kvm_debug_guest)
-#define KVM_GET_MSRS              _IOWR(KVMIO, 0x88, struct kvm_msrs)
-#define KVM_SET_MSRS              _IOW(KVMIO,  0x89, struct kvm_msrs)
-#define KVM_SET_CPUID             _IOW(KVMIO,  0x8a, struct kvm_cpuid)
-#define KVM_SET_SIGNAL_MASK       _IOW(KVMIO,  0x8b, struct kvm_signal_mask)
-#define KVM_GET_FPU               _IOR(KVMIO,  0x8c, struct kvm_fpu)
-#define KVM_SET_FPU               _IOW(KVMIO,  0x8d, struct kvm_fpu)
+#define KVM_RUN                    _IO(KVMIO,   0x80)
+#define KVM_GET_REGS               _IOR(KVMIO,  0x81, struct kvm_regs)
+#define KVM_SET_REGS               _IOW(KVMIO,  0x82, struct kvm_regs)
+#define KVM_GET_SREGS              _IOR(KVMIO,  0x83, struct kvm_sregs)
+#define KVM_SET_SREGS              _IOW(KVMIO,  0x84, struct kvm_sregs)
+#define KVM_TRANSLATE              _IOWR(KVMIO, 0x85, struct kvm_translation)
+#define KVM_INTERRUPT              _IOW(KVMIO,  0x86, struct kvm_interrupt)
+#define KVM_DEBUG_GUEST            _IOW(KVMIO,  0x87, struct kvm_debug_guest)
+#define KVM_GET_MSRS               _IOWR(KVMIO, 0x88, struct kvm_msrs)
+#define KVM_SET_MSRS               _IOW(KVMIO,  0x89, struct kvm_msrs)
+#define KVM_SET_CPUID              _IOW(KVMIO,  0x8a, struct kvm_cpuid)
+#define KVM_SET_SIGNAL_MASK        _IOW(KVMIO,  0x8b, struct kvm_signal_mask)
+#define KVM_GET_FPU                _IOR(KVMIO,  0x8c, struct kvm_fpu)
+#define KVM_SET_FPU                _IOW(KVMIO,  0x8d, struct kvm_fpu)
 
 #endif

[-- Attachment #4: Type: text/plain, Size: 315 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

[-- Attachment #5: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: (no subject)
       [not found] ` <64F9B87B6B770947A9F8391472E032160CBECF40-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
@ 2007-08-12 21:37   ` Anthony Liguori
       [not found]     ` <64F9B87B6B770947A9F8391472E032160CBECF4A@ehost011-8.exch011.intermedia.net>
  2007-08-13  8:10     ` Avi Kivity
  2007-08-14 10:38   ` Carsten Otte
  1 sibling, 2 replies; 9+ messages in thread
From: Anthony Liguori @ 2007-08-12 21:37 UTC (permalink / raw)
  To: Izik Eidus; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Hi Izik,

On Sun, 2007-08-12 at 12:59 -0700, Izik Eidus wrote:
> we are working on swapping support for the guests in kvm.
> we want to allow management of the memory swapping of the guests from
> kvm.
> 
> i wrote this patch that move the guest allocated memory from the
> kernel to the userspace for better management.
> plus in this way it will share more code for such soultion with s390
> and other archs.

Do we care much about maintaining the kvmctl API?  I ask because I think
it would be nice to provide a pointer to kvmctl instead of having it
allocate the memory.  I think there are a few advantages to this.

It simplifies the QEMU patch and it allows for the logic for choosing
how memory is allocated to be done in QEMU.  I'm thinking about things
like allocating memory from hugetlbfs.  Since large pages are a sparse
commodity, I don't think we want to use large pages unless the user asks
us to.  Seems like this logic is best suited to be in QEMU to me.

Otherwise, this page seems pretty good to me.

There's some whitespace damage in this patch to btw.

Regards,

Anthony Liguori


> this is request for comment, so any idea you have please write to me.
> 
> thanks
> 
> 
> 
> -------------------------------------------------------------------------
> This SF.net email is sponsored by: Splunk Inc.
> Still grepping through log files to find problems?  Stop.
> Now Search log events and configuration files using AJAX and a browser.
> Download your FREE copy of Splunk now >>  http://get.splunk.com/
> _______________________________________________ kvm-devel mailing list kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org https://lists.sourceforge.net/lists/listinfo/kvm-devel


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* FW:  (no subject)
       [not found]     ` <64F9B87B6B770947A9F8391472E032160CBECF4A@ehost011-8.exch011.intermedia.net>
@ 2007-08-12 22:43       ` Izik Eidus
  0 siblings, 0 replies; 9+ messages in thread
From: Izik Eidus @ 2007-08-12 22:43 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f


[-- Attachment #1.1: Type: text/plain, Size: 2420 bytes --]




-----הודעה מקורית-----
מאת: Izik Eidus
נשלח: א 12/08/2007 15:43
אל: Anthony Liguori
נושא: RE: [kvm-devel] (no subject)
 
hey Anthony,
yes i think you are right.
passing a pointer sound smarter to me.
i dont think we should care at all about the kvmctl API, and as you said
in this way it would be possible to do the memory allocation at QEMU.

(this evil whitespaces ...)

thanks.

-----הודעה מקורית-----
מאת: Anthony Liguori [mailto:anthony-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org]
נשלח: א 12/08/2007 14:37
אל: Izik Eidus
עותק לידיעה: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
נושא: Re: [kvm-devel] (no subject)
 
Hi Izik,

On Sun, 2007-08-12 at 12:59 -0700, Izik Eidus wrote:
> we are working on swapping support for the guests in kvm.
> we want to allow management of the memory swapping of the guests from
> kvm.
> 
> i wrote this patch that move the guest allocated memory from the
> kernel to the userspace for better management.
> plus in this way it will share more code for such soultion with s390
> and other archs.

Do we care much about maintaining the kvmctl API?  I ask because I think
it would be nice to provide a pointer to kvmctl instead of having it
allocate the memory.  I think there are a few advantages to this.

It simplifies the QEMU patch and it allows for the logic for choosing
how memory is allocated to be done in QEMU.  I'm thinking about things
like allocating memory from hugetlbfs.  Since large pages are a sparse
commodity, I don't think we want to use large pages unless the user asks
us to.  Seems like this logic is best suited to be in QEMU to me.

Otherwise, this page seems pretty good to me.

There's some whitespace damage in this patch to btw.

Regards,

Anthony Liguori


> this is request for comment, so any idea you have please write to me.
> 
> thanks
> 
> 
> 
> -------------------------------------------------------------------------
> This SF.net email is sponsored by: Splunk Inc.
> Still grepping through log files to find problems?  Stop.
> Now Search log events and configuration files using AJAX and a browser.
> Download your FREE copy of Splunk now >>  http://get.splunk.com/
> _______________________________________________ kvm-devel mailing list kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org https://lists.sourceforge.net/lists/listinfo/kvm-devel




[-- Attachment #1.2: Type: text/html, Size: 3350 bytes --]

[-- Attachment #2: Type: text/plain, Size: 315 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

[-- Attachment #3: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: (no subject)
  2007-08-12 21:37   ` Anthony Liguori
       [not found]     ` <64F9B87B6B770947A9F8391472E032160CBECF4A@ehost011-8.exch011.intermedia.net>
@ 2007-08-13  8:10     ` Avi Kivity
  1 sibling, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2007-08-13  8:10 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Izik Eidus, kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Anthony Liguori wrote:
> Hi Izik,
>
> On Sun, 2007-08-12 at 12:59 -0700, Izik Eidus wrote:
>   
>> we are working on swapping support for the guests in kvm.
>> we want to allow management of the memory swapping of the guests from
>> kvm.
>>
>> i wrote this patch that move the guest allocated memory from the
>> kernel to the userspace for better management.
>> plus in this way it will share more code for such soultion with s390
>> and other archs.
>>     
>
> Do we care much about maintaining the kvmctl API?  

Not so much at this time.  We will later on (it will also be a dynamic 
library at that time).

> I ask because I think
> it would be nice to provide a pointer to kvmctl instead of having it
> allocate the memory.  I think there are a few advantages to this.
>
> It simplifies the QEMU patch and it allows for the logic for choosing
> how memory is allocated to be done in QEMU.  I'm thinking about things
> like allocating memory from hugetlbfs.  Since large pages are a sparse
> commodity, I don't think we want to use large pages unless the user asks
> us to.  Seems like this logic is best suited to be in QEMU to me.
>   

Yes, that is one of the motivations for this patch.  Indeed it is a lot 
closer than guest paging.

However this can be modified later, let's keep this patchset small for now.

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: (no subject)
       [not found] ` <64F9B87B6B770947A9F8391472E032160CBECF40-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
  2007-08-12 21:37   ` Anthony Liguori
@ 2007-08-14 10:38   ` Carsten Otte
       [not found]     ` <64F9B87B6B770947A9F8391472E032160CBECF52@ehost011-8.exch011.intermedia.net>
       [not found]     ` <46C18616.5000005-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
  1 sibling, 2 replies; 9+ messages in thread
From: Carsten Otte @ 2007-08-14 10:38 UTC (permalink / raw)
  To: Izik Eidus; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Izik Eidus wrote:
> we are working on swapping support for the guests in kvm.
> we want to allow management of the memory swapping of the guests from kvm.
This is excellent, thank you!

> this is request for comment, so any idea you have please write to me.
I ran into a few while reading the code:

+static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
+{
+	int i;
+	
+	for (i = 0; i < free->npages; ++i) {
+		if (free->phys_mem[i]) {
+			if (!PageReserved(free->phys_mem[i]))
+				SetPageDirty(free->phys_mem[i]);
+			page_cache_release(free->phys_mem[i]);
+		}
+	}
+}
I don't see why we would want to dirty a page we release in general. 
We do only need to dirty it, if the corresponding page table entry 
indicates so (dirty bit). Did I miss something?



@@ -670,7 +692,8 @@ EXPORT_SYMBOL_GPL(fx_init);
   * Discontiguous memory is allowed, mostly for framebuffers.
   */
  static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-					  struct kvm_memory_region *mem)
+					  struct kvm_memory_region *mem,
+						unsigned long guest_host_addr)
  {
  	int r;
  	gfn_t base_gfn;
@@ -748,12 +771,26 @@ raced:
  			goto out_free;

  		memset(new.phys_mem, 0, npages * sizeof(struct page *));
-		for (i = 0; i < npages; ++i) {
-			new.phys_mem[i] = alloc_page(GFP_HIGHUSER
-						     | __GFP_ZERO);
-			if (!new.phys_mem[i])
+		
+		if (guest_host_addr) {
+			unsigned long pages_num;
+			
+			new.user_alloc = 1;
+			down_read(&current->mm->mmap_sem);
+			pages_num = get_user_pages(current, current->mm, guest_host_addr,
+								npages, 1, 0, new.phys_mem, NULL);
+			up_read(&current->mm->mmap_sem);
+			if (pages_num != npages)
  				goto out_free;
-			set_page_private(new.phys_mem[i],0);
+		} else {
+			new.user_alloc = 0;
+			for (i = 0; i < npages; ++i) {
+				new.phys_mem[i] = alloc_page(GFP_HIGHUSER
+							     | __GFP_ZERO);
+				if (!new.phys_mem[i])
+					goto out_free;
+				set_page_private(new.phys_mem[i],0);
+			}
  		}
  	}
Do we intend to maintain both pathes in the long run, or wait until we 
don't care about ancient userland anymore that does'nt do the 
allocation on its own?


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* FW:  (no subject)
       [not found]     ` <64F9B87B6B770947A9F8391472E032160CBECF52@ehost011-8.exch011.intermedia.net>
@ 2007-08-14 11:11       ` Izik Eidus
  0 siblings, 0 replies; 9+ messages in thread
From: Izik Eidus @ 2007-08-14 11:11 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f


[-- Attachment #1.1: Type: text/plain, Size: 2742 bytes --]

hey Carsten,
about the dirty pages, i think you are right, i will look at this!.
about the api, as far as i can tell yes, we are going to keep the api to both options.
(when i wrote it, i wrote it with thought about how not breaking the old api.)

thanks for the comments! :)

-----Original Message-----
From: Carsten Otte [mailto:cotte-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org]
Sent: Tue 8/14/2007 3:38 AM
To: Izik Eidus
Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Subject: Re: [kvm-devel] (no subject)
 
Izik Eidus wrote:
> we are working on swapping support for the guests in kvm.
> we want to allow management of the memory swapping of the guests from kvm.
This is excellent, thank you!

> this is request for comment, so any idea you have please write to me.
I ran into a few while reading the code:

+static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
+{
+	int i;
+	
+	for (i = 0; i < free->npages; ++i) {
+		if (free->phys_mem[i]) {
+			if (!PageReserved(free->phys_mem[i]))
+				SetPageDirty(free->phys_mem[i]);
+			page_cache_release(free->phys_mem[i]);
+		}
+	}
+}
I don't see why we would want to dirty a page we release in general. 
We do only need to dirty it, if the corresponding page table entry 
indicates so (dirty bit). Did I miss something?



@@ -670,7 +692,8 @@ EXPORT_SYMBOL_GPL(fx_init);
   * Discontiguous memory is allowed, mostly for framebuffers.
   */
  static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-					  struct kvm_memory_region *mem)
+					  struct kvm_memory_region *mem,
+						unsigned long guest_host_addr)
  {
  	int r;
  	gfn_t base_gfn;
@@ -748,12 +771,26 @@ raced:
  			goto out_free;

  		memset(new.phys_mem, 0, npages * sizeof(struct page *));
-		for (i = 0; i < npages; ++i) {
-			new.phys_mem[i] = alloc_page(GFP_HIGHUSER
-						     | __GFP_ZERO);
-			if (!new.phys_mem[i])
+		
+		if (guest_host_addr) {
+			unsigned long pages_num;
+			
+			new.user_alloc = 1;
+			down_read(&current->mm->mmap_sem);
+			pages_num = get_user_pages(current, current->mm, guest_host_addr,
+								npages, 1, 0, new.phys_mem, NULL);
+			up_read(&current->mm->mmap_sem);
+			if (pages_num != npages)
  				goto out_free;
-			set_page_private(new.phys_mem[i],0);
+		} else {
+			new.user_alloc = 0;
+			for (i = 0; i < npages; ++i) {
+				new.phys_mem[i] = alloc_page(GFP_HIGHUSER
+							     | __GFP_ZERO);
+				if (!new.phys_mem[i])
+					goto out_free;
+				set_page_private(new.phys_mem[i],0);
+			}
  		}
  	}
Do we intend to maintain both pathes in the long run, or wait until we 
don't care about ancient userland anymore that does'nt do the 
allocation on its own?




[-- Attachment #1.2: Type: text/html, Size: 8977 bytes --]

[-- Attachment #2: Type: text/plain, Size: 315 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

[-- Attachment #3: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: user-allocated memory
       [not found]     ` <46C18616.5000005-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
@ 2007-08-14 11:52       ` Avi Kivity
       [not found]         ` <46C19778.4090502-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2007-08-14 11:52 UTC (permalink / raw)
  To: carsteno-tA70FqPdS9bQT0dZR+AlfA
  Cc: Izik Eidus, kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Carsten Otte wrote:
> Izik Eidus wrote:
>   
>> we are working on swapping support for the guests in kvm.
>> we want to allow management of the memory swapping of the guests from kvm.
>>     
> This is excellent, thank you!
>
>   
>> this is request for comment, so any idea you have please write to me.
>>     
> I ran into a few while reading the code:
>
> +static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
> +{
> +	int i;
> +	
> +	for (i = 0; i < free->npages; ++i) {
> +		if (free->phys_mem[i]) {
> +			if (!PageReserved(free->phys_mem[i]))
> +				SetPageDirty(free->phys_mem[i]);
> +			page_cache_release(free->phys_mem[i]);
> +		}
> +	}
> +}
> I don't see why we would want to dirty a page we release in general. 
> We do only need to dirty it, if the corresponding page table entry 
> indicates so (dirty bit). Did I miss something?
>
>   

kvm only tracks dirty bits if requested by userspace (for live 
migration).  Checking the pte is not so easy because the pte contains 
only host addresses, not page addresses (well, we could consult the 
guest page table, but...).

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: user-allocated memory
       [not found]         ` <46C19778.4090502-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-08-14 12:54           ` Carsten Otte
       [not found]             ` <46C1A5F4.4070006-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Carsten Otte @ 2007-08-14 12:54 UTC (permalink / raw)
  To: Avi Kivity
  Cc: carsteno-tA70FqPdS9bQT0dZR+AlfA, Izik Eidus,
	kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Avi Kivity wrote:
> kvm only tracks dirty bits if requested by userspace (for live 
> migration).  Checking the pte is not so easy because the pte contains 
> only host addresses, not page addresses (well, we could consult the 
> guest page table, but...).
As far as I undstand this code, this function is used to release a 
memory page. Hopefully at this time the guest has not set up any page 
table entries to this page, because we're releasing the target page. 
When the guest migrates the dirty bit from its pte to its struct page 
(it always does this at least when removing the pte), we could 
indicate this to the hypervisor e.g. via hypercall. This way the 
hypervisor would know if the page is dirty or not, without asking the 
guest if the guest is paravirtual.
That said, for ept/npt/s390 we have the information anyway without the 
need to do above trick, due to the double dirty/reference tracking 
approach. That would also work for non-paravirtual guests in that case.

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: user-allocated memory
       [not found]             ` <46C1A5F4.4070006-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
@ 2007-08-14 13:01               ` Avi Kivity
  0 siblings, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2007-08-14 13:01 UTC (permalink / raw)
  To: carsteno-tA70FqPdS9bQT0dZR+AlfA
  Cc: Izik Eidus, kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Carsten Otte wrote:
> Avi Kivity wrote:
>> kvm only tracks dirty bits if requested by userspace (for live 
>> migration).  Checking the pte is not so easy because the pte contains 
>> only host addresses, not page addresses (well, we could consult the 
>> guest page table, but...).
> As far as I undstand this code, this function is used to release a 
> memory page. Hopefully at this time the guest has not set up any page 
> table entries to this page, because we're releasing the target page. 
> When the guest migrates the dirty bit from its pte to its struct page 
> (it always does this at least when removing the pte), we could 
> indicate this to the hypervisor e.g. via hypercall. This way the 
> hypervisor would know if the page is dirty or not, without asking the 
> guest if the guest is paravirtual.
> That said, for ept/npt/s390 we have the information anyway without the 
> need to do above trick, due to the double dirty/reference tracking 
> approach. That would also work for non-paravirtual guests in that case.

We can actually make it work for non-paravirt, non-npt as well; when 
removing the shadow pte, set the page dirty bit.  I forgot that the gfn 
is immaterial for this.


-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2007-08-14 13:01 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-08-12 19:59 (no subject) Izik Eidus
     [not found] ` <64F9B87B6B770947A9F8391472E032160CBECF40-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
2007-08-12 21:37   ` Anthony Liguori
     [not found]     ` <64F9B87B6B770947A9F8391472E032160CBECF4A@ehost011-8.exch011.intermedia.net>
2007-08-12 22:43       ` FW: " Izik Eidus
2007-08-13  8:10     ` Avi Kivity
2007-08-14 10:38   ` Carsten Otte
     [not found]     ` <64F9B87B6B770947A9F8391472E032160CBECF52@ehost011-8.exch011.intermedia.net>
2007-08-14 11:11       ` FW: " Izik Eidus
     [not found]     ` <46C18616.5000005-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
2007-08-14 11:52       ` user-allocated memory Avi Kivity
     [not found]         ` <46C19778.4090502-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-08-14 12:54           ` Carsten Otte
     [not found]             ` <46C1A5F4.4070006-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
2007-08-14 13:01               ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.