* (no subject)
@ 2007-08-12 19:59 Izik Eidus
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF40-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
0 siblings, 1 reply; 9+ messages in thread
From: Izik Eidus @ 2007-08-12 19:59 UTC (permalink / raw)
To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
[-- Attachment #1.1: Type: text/plain, Size: 422 bytes --]
we are working on swapping support for the guests in kvm.
we want to allow management of the memory swapping of the guests from kvm.
i wrote this patch that move the guest allocated memory from the kernel to the userspace for better management.
plus in this way it will share more code for such soultion with s390 and other archs.
this is request for comment, so any idea you have please write to me.
thanks
[-- Attachment #1.2: Type: text/html, Size: 807 bytes --]
[-- Attachment #2: kvmctl_patch --]
[-- Type: application/octet-stream, Size: 3666 bytes --]
diff --git a/user/kvmctl.c b/user/kvmctl.c
index 43b374d..887cb63 100644
--- a/user/kvmctl.c
+++ b/user/kvmctl.c
@@ -239,12 +239,13 @@ int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
int fd = kvm->fd;
int zfd;
int r;
- struct kvm_memory_region low_memory = {
+
+ struct kvm_host_memory_region low_memory = {
.slot = 3,
.memory_size = memory < dosmem ? memory : dosmem,
.guest_phys_addr = 0,
};
- struct kvm_memory_region extended_memory = {
+ struct kvm_host_memory_region extended_memory = {
.slot = 0,
.memory_size = memory < exmem ? 0 : memory - exmem,
.guest_phys_addr = exmem,
@@ -259,14 +260,41 @@ int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
}
kvm->vm_fd = fd;
+ *vm_mem = mmap(NULL, memory, PROT_READ|PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ if (*vm_mem == MAP_FAILED) {
+ fprintf(stderr, "kvm_create: %s", strerror(errno));
+ return -1;
+ }
+
+ r = munmap(memory + dosmem, exmem - dosmem);
+ if (r == -1) {
+ fprintf(stderr, "kvm_create: %s", strerror(errno));
+ return -1;
+ }
+
+ memset(*vm_mem, 0, dosmem);
+ memset(*vm_mem + exmem, 0, memory - exmem);
+
/* 640K should be enough. */
- r = ioctl(fd, KVM_SET_MEMORY_REGION, &low_memory);
+ low_memory.guest_host_addr = *vm_mem;
+ if (!low_memory.guest_host_addr) {
+ fprintf(stderr, "kvm_create: %s", strerror(errno));
+ return -1;
+ }
+
+ r = ioctl(fd, KVM_SET_USER_MEMORY_REGION, &low_memory);
if (r == -1) {
fprintf(stderr, "kvm_create_memory_region: %m\n");
return -1;
}
if (extended_memory.memory_size) {
- r = ioctl(fd, KVM_SET_MEMORY_REGION, &extended_memory);
+ extended_memory.guest_host_addr = *vm_mem + exmem;
+ if (!extended_memory.guest_host_addr) {
+ fprintf(stderr, "kvm_create: %s", strerror(errno));
+ return -1;
+ }
+
+ r = ioctl(fd, KVM_SET_USER_MEMORY_REGION, &extended_memory);
if (r == -1) {
fprintf(stderr, "kvm_create_memory_region: %m\n");
return -1;
@@ -276,11 +304,6 @@ int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
kvm_memory_region_save_params(kvm, &low_memory);
kvm_memory_region_save_params(kvm, &extended_memory);
- *vm_mem = mmap(NULL, memory, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
- if (*vm_mem == MAP_FAILED) {
- fprintf(stderr, "mmap: %m\n");
- return -1;
- }
kvm->physical_memory = *vm_mem;
zfd = open("/dev/zero", O_RDONLY);
@@ -291,7 +314,7 @@ int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
r = kvm_create_vcpu(kvm, 0);
if (r < 0)
return r;
-
+
return 0;
}
@@ -302,25 +325,36 @@ void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start,
int r;
int fd = kvm->vm_fd;
int prot = PROT_READ;
- struct kvm_memory_region memory = {
+ struct kvm_host_memory_region memory = {
.slot = slot,
.memory_size = len,
.guest_phys_addr = phys_start,
.flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
};
-
- r = ioctl(fd, KVM_SET_MEMORY_REGION, &memory);
+
+ if (writable)
+ prot |= PROT_WRITE;
+
+ ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ if (ptr == MAP_FAILED) {
+ fprintf(stderr, "kvm_create_phys_mem: %s", strerror(errno));
+ return -1;
+ }
+
+ memset(ptr, 0, len);
+
+ memory.guest_host_addr = ptr;
+ if (!memory.guest_host_addr) {
+ fprintf(stderr, "kvm_create_phys_mem: %s", strerror(errno));
+ return 0;
+ }
+
+ r = ioctl(fd, KVM_SET_USER_MEMORY_REGION, &memory);
if (r == -1)
return 0;
kvm_memory_region_save_params(kvm, &memory);
- if (writable)
- prot |= PROT_WRITE;
-
- ptr = mmap(NULL, len, prot, MAP_SHARED, fd, phys_start);
- if (ptr == MAP_FAILED)
- return 0;
return ptr;
}
[-- Attachment #3: kvm_kernel_patch --]
[-- Type: application/octet-stream, Size: 8168 bytes --]
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index fc27c2f..a94714f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -415,6 +415,7 @@ struct kvm_memory_slot {
unsigned long flags;
struct page **phys_mem;
unsigned long *dirty_bitmap;
+ int user_alloc; /* user allocated memory */
};
struct kvm {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index bc11c2d..1e6fa58 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -37,6 +37,7 @@
#include <linux/cpumask.h>
#include <linux/smp.h>
#include <linux/anon_inodes.h>
+#include <linux/pagemap.h>
#include <asm/processor.h>
#include <asm/msr.h>
@@ -322,19 +323,40 @@ static int kvm_dev_open(struct inode *inode, struct file *filp)
return 0;
}
+static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
+{
+ int i;
+
+ for (i = 0; i < free->npages; ++i) {
+ if (free->phys_mem[i]) {
+ if (!PageReserved(free->phys_mem[i]))
+ SetPageDirty(free->phys_mem[i]);
+ page_cache_release(free->phys_mem[i]);
+ }
+ }
+}
+
+static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
+{
+ int i;
+
+ for (i = 0; i < free->npages; ++i)
+ if (free->phys_mem[i])
+ __free_page(free->phys_mem[i]);
+}
+
/*
* Free any memory in @free but not in @dont.
*/
static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
- int i;
-
if (!dont || free->phys_mem != dont->phys_mem)
if (free->phys_mem) {
- for (i = 0; i < free->npages; ++i)
- if (free->phys_mem[i])
- __free_page(free->phys_mem[i]);
+ if (free->user_alloc)
+ kvm_free_userspace_physmem(free);
+ else
+ kvm_free_kernel_physmem(free);
vfree(free->phys_mem);
}
@@ -670,7 +692,8 @@ EXPORT_SYMBOL_GPL(fx_init);
* Discontiguous memory is allowed, mostly for framebuffers.
*/
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
- struct kvm_memory_region *mem)
+ struct kvm_memory_region *mem,
+ unsigned long guest_host_addr)
{
int r;
gfn_t base_gfn;
@@ -748,12 +771,26 @@ raced:
goto out_free;
memset(new.phys_mem, 0, npages * sizeof(struct page *));
- for (i = 0; i < npages; ++i) {
- new.phys_mem[i] = alloc_page(GFP_HIGHUSER
- | __GFP_ZERO);
- if (!new.phys_mem[i])
+
+ if (guest_host_addr) {
+ unsigned long pages_num;
+
+ new.user_alloc = 1;
+ down_read(¤t->mm->mmap_sem);
+ pages_num = get_user_pages(current, current->mm, guest_host_addr,
+ npages, 1, 0, new.phys_mem, NULL);
+ up_read(¤t->mm->mmap_sem);
+ if (pages_num != npages)
goto out_free;
- set_page_private(new.phys_mem[i],0);
+ } else {
+ new.user_alloc = 0;
+ for (i = 0; i < npages; ++i) {
+ new.phys_mem[i] = alloc_page(GFP_HIGHUSER
+ | __GFP_ZERO);
+ if (!new.phys_mem[i])
+ goto out_free;
+ set_page_private(new.phys_mem[i],0);
+ }
}
}
@@ -2772,10 +2809,32 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
goto out;
- r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem);
+ r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem, 0);
+ if (r)
+ goto out;
+ break;
+ }
+ case KVM_SET_USER_MEMORY_REGION: {
+ struct kvm_memory_region kvm_mem;
+ struct kvm_host_memory_region kvm_host_mem;
+ r = -EFAULT;
+ if (copy_from_user(&kvm_host_mem, argp, sizeof kvm_host_mem))
+ goto out;
+
+ if (!kvm_host_mem.guest_host_addr)
+ goto out;
+
+ kvm_mem.slot = kvm_host_mem.slot;
+ kvm_mem.flags = kvm_host_mem.flags;
+ kvm_mem.guest_phys_addr = kvm_host_mem.guest_phys_addr;
+ kvm_mem.memory_size = kvm_host_mem.memory_size;
+
+ r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem,
+ kvm_host_mem.guest_host_addr);
if (r)
goto out;
break;
+
}
case KVM_GET_DIRTY_LOG: {
struct kvm_dirty_log log;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 91a446f..e53e70e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -23,6 +23,15 @@ struct kvm_memory_region {
__u64 memory_size; /* bytes */
};
+/* for KVM_SET_USER_MEMORY_REGION */
+struct kvm_host_memory_region {
+ __u32 slot;
+ __u32 flags;
+ __u64 guest_phys_addr;
+ __u64 memory_size; /* bytes */
+ __u64 guest_host_addr; /* host userspace allocated memory address */
+};
+
/* for kvm_memory_region::flags */
#define KVM_MEM_LOG_DIRTY_PAGES 1UL
@@ -254,47 +263,50 @@ struct kvm_signal_mask {
/*
* ioctls for /dev/kvm fds:
*/
-#define KVM_GET_API_VERSION _IO(KVMIO, 0x00)
-#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */
-#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list)
+#define KVM_GET_API_VERSION _IO(KVMIO, 0x00)
+#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */
+#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list)
/*
* Check if a kvm extension is available. Argument is extension number,
* return is 1 (yes) or 0 (no, sorry).
*/
-#define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03)
+#define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03)
/*
* Get size for mmap(vcpu_fd)
*/
-#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
+#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
/*
* ioctls for VM fds
*/
-#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
+#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x44,\
+ struct kvm_host_memory_region)
+
/*
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
* a vcpu fd.
*/
-#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
-#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
-#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
+#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
+#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
+#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
/*
* ioctls for vcpu fds
*/
-#define KVM_RUN _IO(KVMIO, 0x80)
-#define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs)
-#define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs)
-#define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs)
-#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
-#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
-#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
-#define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
-#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
-#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
-#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
-#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask)
-#define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu)
-#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu)
+#define KVM_RUN _IO(KVMIO, 0x80)
+#define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs)
+#define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs)
+#define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs)
+#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
+#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
+#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
+#define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
+#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
+#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
+#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
+#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask)
+#define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu)
+#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu)
#endif
[-- Attachment #4: Type: text/plain, Size: 315 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
[-- Attachment #5: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: (no subject)
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF40-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
@ 2007-08-12 21:37 ` Anthony Liguori
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF4A@ehost011-8.exch011.intermedia.net>
2007-08-13 8:10 ` Avi Kivity
2007-08-14 10:38 ` Carsten Otte
1 sibling, 2 replies; 9+ messages in thread
From: Anthony Liguori @ 2007-08-12 21:37 UTC (permalink / raw)
To: Izik Eidus; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Hi Izik,
On Sun, 2007-08-12 at 12:59 -0700, Izik Eidus wrote:
> we are working on swapping support for the guests in kvm.
> we want to allow management of the memory swapping of the guests from
> kvm.
>
> i wrote this patch that move the guest allocated memory from the
> kernel to the userspace for better management.
> plus in this way it will share more code for such soultion with s390
> and other archs.
Do we care much about maintaining the kvmctl API? I ask because I think
it would be nice to provide a pointer to kvmctl instead of having it
allocate the memory. I think there are a few advantages to this.
It simplifies the QEMU patch and it allows for the logic for choosing
how memory is allocated to be done in QEMU. I'm thinking about things
like allocating memory from hugetlbfs. Since large pages are a sparse
commodity, I don't think we want to use large pages unless the user asks
us to. Seems like this logic is best suited to be in QEMU to me.
Otherwise, this page seems pretty good to me.
There's some whitespace damage in this patch to btw.
Regards,
Anthony Liguori
> this is request for comment, so any idea you have please write to me.
>
> thanks
>
>
>
> -------------------------------------------------------------------------
> This SF.net email is sponsored by: Splunk Inc.
> Still grepping through log files to find problems? Stop.
> Now Search log events and configuration files using AJAX and a browser.
> Download your FREE copy of Splunk now >> http://get.splunk.com/
> _______________________________________________ kvm-devel mailing list kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org https://lists.sourceforge.net/lists/listinfo/kvm-devel
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
^ permalink raw reply [flat|nested] 9+ messages in thread
* FW: (no subject)
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF4A@ehost011-8.exch011.intermedia.net>
@ 2007-08-12 22:43 ` Izik Eidus
0 siblings, 0 replies; 9+ messages in thread
From: Izik Eidus @ 2007-08-12 22:43 UTC (permalink / raw)
To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
[-- Attachment #1.1: Type: text/plain, Size: 2420 bytes --]
-----הודעה מקורית-----
מאת: Izik Eidus
נשלח: א 12/08/2007 15:43
אל: Anthony Liguori
נושא: RE: [kvm-devel] (no subject)
hey Anthony,
yes i think you are right.
passing a pointer sound smarter to me.
i dont think we should care at all about the kvmctl API, and as you said
in this way it would be possible to do the memory allocation at QEMU.
(this evil whitespaces ...)
thanks.
-----הודעה מקורית-----
מאת: Anthony Liguori [mailto:anthony-rdkfGonbjUSkNkDKm+mE6A@public.gmane.org]
נשלח: א 12/08/2007 14:37
אל: Izik Eidus
עותק לידיעה: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
נושא: Re: [kvm-devel] (no subject)
Hi Izik,
On Sun, 2007-08-12 at 12:59 -0700, Izik Eidus wrote:
> we are working on swapping support for the guests in kvm.
> we want to allow management of the memory swapping of the guests from
> kvm.
>
> i wrote this patch that move the guest allocated memory from the
> kernel to the userspace for better management.
> plus in this way it will share more code for such soultion with s390
> and other archs.
Do we care much about maintaining the kvmctl API? I ask because I think
it would be nice to provide a pointer to kvmctl instead of having it
allocate the memory. I think there are a few advantages to this.
It simplifies the QEMU patch and it allows for the logic for choosing
how memory is allocated to be done in QEMU. I'm thinking about things
like allocating memory from hugetlbfs. Since large pages are a sparse
commodity, I don't think we want to use large pages unless the user asks
us to. Seems like this logic is best suited to be in QEMU to me.
Otherwise, this page seems pretty good to me.
There's some whitespace damage in this patch to btw.
Regards,
Anthony Liguori
> this is request for comment, so any idea you have please write to me.
>
> thanks
>
>
>
> -------------------------------------------------------------------------
> This SF.net email is sponsored by: Splunk Inc.
> Still grepping through log files to find problems? Stop.
> Now Search log events and configuration files using AJAX and a browser.
> Download your FREE copy of Splunk now >> http://get.splunk.com/
> _______________________________________________ kvm-devel mailing list kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org https://lists.sourceforge.net/lists/listinfo/kvm-devel
[-- Attachment #1.2: Type: text/html, Size: 3350 bytes --]
[-- Attachment #2: Type: text/plain, Size: 315 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
[-- Attachment #3: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: (no subject)
2007-08-12 21:37 ` Anthony Liguori
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF4A@ehost011-8.exch011.intermedia.net>
@ 2007-08-13 8:10 ` Avi Kivity
1 sibling, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2007-08-13 8:10 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Izik Eidus, kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Anthony Liguori wrote:
> Hi Izik,
>
> On Sun, 2007-08-12 at 12:59 -0700, Izik Eidus wrote:
>
>> we are working on swapping support for the guests in kvm.
>> we want to allow management of the memory swapping of the guests from
>> kvm.
>>
>> i wrote this patch that move the guest allocated memory from the
>> kernel to the userspace for better management.
>> plus in this way it will share more code for such soultion with s390
>> and other archs.
>>
>
> Do we care much about maintaining the kvmctl API?
Not so much at this time. We will later on (it will also be a dynamic
library at that time).
> I ask because I think
> it would be nice to provide a pointer to kvmctl instead of having it
> allocate the memory. I think there are a few advantages to this.
>
> It simplifies the QEMU patch and it allows for the logic for choosing
> how memory is allocated to be done in QEMU. I'm thinking about things
> like allocating memory from hugetlbfs. Since large pages are a sparse
> commodity, I don't think we want to use large pages unless the user asks
> us to. Seems like this logic is best suited to be in QEMU to me.
>
Yes, that is one of the motivations for this patch. Indeed it is a lot
closer than guest paging.
However this can be modified later, let's keep this patchset small for now.
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: (no subject)
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF40-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
2007-08-12 21:37 ` Anthony Liguori
@ 2007-08-14 10:38 ` Carsten Otte
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF52@ehost011-8.exch011.intermedia.net>
[not found] ` <46C18616.5000005-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
1 sibling, 2 replies; 9+ messages in thread
From: Carsten Otte @ 2007-08-14 10:38 UTC (permalink / raw)
To: Izik Eidus; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Izik Eidus wrote:
> we are working on swapping support for the guests in kvm.
> we want to allow management of the memory swapping of the guests from kvm.
This is excellent, thank you!
> this is request for comment, so any idea you have please write to me.
I ran into a few while reading the code:
+static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
+{
+ int i;
+
+ for (i = 0; i < free->npages; ++i) {
+ if (free->phys_mem[i]) {
+ if (!PageReserved(free->phys_mem[i]))
+ SetPageDirty(free->phys_mem[i]);
+ page_cache_release(free->phys_mem[i]);
+ }
+ }
+}
I don't see why we would want to dirty a page we release in general.
We do only need to dirty it, if the corresponding page table entry
indicates so (dirty bit). Did I miss something?
@@ -670,7 +692,8 @@ EXPORT_SYMBOL_GPL(fx_init);
* Discontiguous memory is allowed, mostly for framebuffers.
*/
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
- struct kvm_memory_region *mem)
+ struct kvm_memory_region *mem,
+ unsigned long guest_host_addr)
{
int r;
gfn_t base_gfn;
@@ -748,12 +771,26 @@ raced:
goto out_free;
memset(new.phys_mem, 0, npages * sizeof(struct page *));
- for (i = 0; i < npages; ++i) {
- new.phys_mem[i] = alloc_page(GFP_HIGHUSER
- | __GFP_ZERO);
- if (!new.phys_mem[i])
+
+ if (guest_host_addr) {
+ unsigned long pages_num;
+
+ new.user_alloc = 1;
+ down_read(¤t->mm->mmap_sem);
+ pages_num = get_user_pages(current, current->mm, guest_host_addr,
+ npages, 1, 0, new.phys_mem, NULL);
+ up_read(¤t->mm->mmap_sem);
+ if (pages_num != npages)
goto out_free;
- set_page_private(new.phys_mem[i],0);
+ } else {
+ new.user_alloc = 0;
+ for (i = 0; i < npages; ++i) {
+ new.phys_mem[i] = alloc_page(GFP_HIGHUSER
+ | __GFP_ZERO);
+ if (!new.phys_mem[i])
+ goto out_free;
+ set_page_private(new.phys_mem[i],0);
+ }
}
}
Do we intend to maintain both pathes in the long run, or wait until we
don't care about ancient userland anymore that does'nt do the
allocation on its own?
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
^ permalink raw reply [flat|nested] 9+ messages in thread
* FW: (no subject)
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF52@ehost011-8.exch011.intermedia.net>
@ 2007-08-14 11:11 ` Izik Eidus
0 siblings, 0 replies; 9+ messages in thread
From: Izik Eidus @ 2007-08-14 11:11 UTC (permalink / raw)
To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
[-- Attachment #1.1: Type: text/plain, Size: 2742 bytes --]
hey Carsten,
about the dirty pages, i think you are right, i will look at this!.
about the api, as far as i can tell yes, we are going to keep the api to both options.
(when i wrote it, i wrote it with thought about how not breaking the old api.)
thanks for the comments! :)
-----Original Message-----
From: Carsten Otte [mailto:cotte-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org]
Sent: Tue 8/14/2007 3:38 AM
To: Izik Eidus
Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Subject: Re: [kvm-devel] (no subject)
Izik Eidus wrote:
> we are working on swapping support for the guests in kvm.
> we want to allow management of the memory swapping of the guests from kvm.
This is excellent, thank you!
> this is request for comment, so any idea you have please write to me.
I ran into a few while reading the code:
+static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
+{
+ int i;
+
+ for (i = 0; i < free->npages; ++i) {
+ if (free->phys_mem[i]) {
+ if (!PageReserved(free->phys_mem[i]))
+ SetPageDirty(free->phys_mem[i]);
+ page_cache_release(free->phys_mem[i]);
+ }
+ }
+}
I don't see why we would want to dirty a page we release in general.
We do only need to dirty it, if the corresponding page table entry
indicates so (dirty bit). Did I miss something?
@@ -670,7 +692,8 @@ EXPORT_SYMBOL_GPL(fx_init);
* Discontiguous memory is allowed, mostly for framebuffers.
*/
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
- struct kvm_memory_region *mem)
+ struct kvm_memory_region *mem,
+ unsigned long guest_host_addr)
{
int r;
gfn_t base_gfn;
@@ -748,12 +771,26 @@ raced:
goto out_free;
memset(new.phys_mem, 0, npages * sizeof(struct page *));
- for (i = 0; i < npages; ++i) {
- new.phys_mem[i] = alloc_page(GFP_HIGHUSER
- | __GFP_ZERO);
- if (!new.phys_mem[i])
+
+ if (guest_host_addr) {
+ unsigned long pages_num;
+
+ new.user_alloc = 1;
+ down_read(¤t->mm->mmap_sem);
+ pages_num = get_user_pages(current, current->mm, guest_host_addr,
+ npages, 1, 0, new.phys_mem, NULL);
+ up_read(¤t->mm->mmap_sem);
+ if (pages_num != npages)
goto out_free;
- set_page_private(new.phys_mem[i],0);
+ } else {
+ new.user_alloc = 0;
+ for (i = 0; i < npages; ++i) {
+ new.phys_mem[i] = alloc_page(GFP_HIGHUSER
+ | __GFP_ZERO);
+ if (!new.phys_mem[i])
+ goto out_free;
+ set_page_private(new.phys_mem[i],0);
+ }
}
}
Do we intend to maintain both pathes in the long run, or wait until we
don't care about ancient userland anymore that does'nt do the
allocation on its own?
[-- Attachment #1.2: Type: text/html, Size: 8977 bytes --]
[-- Attachment #2: Type: text/plain, Size: 315 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
[-- Attachment #3: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: user-allocated memory
[not found] ` <46C18616.5000005-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
@ 2007-08-14 11:52 ` Avi Kivity
[not found] ` <46C19778.4090502-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2007-08-14 11:52 UTC (permalink / raw)
To: carsteno-tA70FqPdS9bQT0dZR+AlfA
Cc: Izik Eidus, kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Carsten Otte wrote:
> Izik Eidus wrote:
>
>> we are working on swapping support for the guests in kvm.
>> we want to allow management of the memory swapping of the guests from kvm.
>>
> This is excellent, thank you!
>
>
>> this is request for comment, so any idea you have please write to me.
>>
> I ran into a few while reading the code:
>
> +static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
> +{
> + int i;
> +
> + for (i = 0; i < free->npages; ++i) {
> + if (free->phys_mem[i]) {
> + if (!PageReserved(free->phys_mem[i]))
> + SetPageDirty(free->phys_mem[i]);
> + page_cache_release(free->phys_mem[i]);
> + }
> + }
> +}
> I don't see why we would want to dirty a page we release in general.
> We do only need to dirty it, if the corresponding page table entry
> indicates so (dirty bit). Did I miss something?
>
>
kvm only tracks dirty bits if requested by userspace (for live
migration). Checking the pte is not so easy because the pte contains
only host addresses, not page addresses (well, we could consult the
guest page table, but...).
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: user-allocated memory
[not found] ` <46C19778.4090502-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-08-14 12:54 ` Carsten Otte
[not found] ` <46C1A5F4.4070006-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 9+ messages in thread
From: Carsten Otte @ 2007-08-14 12:54 UTC (permalink / raw)
To: Avi Kivity
Cc: carsteno-tA70FqPdS9bQT0dZR+AlfA, Izik Eidus,
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Avi Kivity wrote:
> kvm only tracks dirty bits if requested by userspace (for live
> migration). Checking the pte is not so easy because the pte contains
> only host addresses, not page addresses (well, we could consult the
> guest page table, but...).
As far as I undstand this code, this function is used to release a
memory page. Hopefully at this time the guest has not set up any page
table entries to this page, because we're releasing the target page.
When the guest migrates the dirty bit from its pte to its struct page
(it always does this at least when removing the pte), we could
indicate this to the hypervisor e.g. via hypercall. This way the
hypervisor would know if the page is dirty or not, without asking the
guest if the guest is paravirtual.
That said, for ept/npt/s390 we have the information anyway without the
need to do above trick, due to the double dirty/reference tracking
approach. That would also work for non-paravirtual guests in that case.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: user-allocated memory
[not found] ` <46C1A5F4.4070006-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
@ 2007-08-14 13:01 ` Avi Kivity
0 siblings, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2007-08-14 13:01 UTC (permalink / raw)
To: carsteno-tA70FqPdS9bQT0dZR+AlfA
Cc: Izik Eidus, kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Carsten Otte wrote:
> Avi Kivity wrote:
>> kvm only tracks dirty bits if requested by userspace (for live
>> migration). Checking the pte is not so easy because the pte contains
>> only host addresses, not page addresses (well, we could consult the
>> guest page table, but...).
> As far as I undstand this code, this function is used to release a
> memory page. Hopefully at this time the guest has not set up any page
> table entries to this page, because we're releasing the target page.
> When the guest migrates the dirty bit from its pte to its struct page
> (it always does this at least when removing the pte), we could
> indicate this to the hypervisor e.g. via hypercall. This way the
> hypervisor would know if the page is dirty or not, without asking the
> guest if the guest is paravirtual.
> That said, for ept/npt/s390 we have the information anyway without the
> need to do above trick, due to the double dirty/reference tracking
> approach. That would also work for non-paravirtual guests in that case.
We can actually make it work for non-paravirt, non-npt as well; when
removing the shadow pte, set the page dirty bit. I forgot that the gfn
is immaterial for this.
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2007-08-14 13:01 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-08-12 19:59 (no subject) Izik Eidus
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF40-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
2007-08-12 21:37 ` Anthony Liguori
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF4A@ehost011-8.exch011.intermedia.net>
2007-08-12 22:43 ` FW: " Izik Eidus
2007-08-13 8:10 ` Avi Kivity
2007-08-14 10:38 ` Carsten Otte
[not found] ` <64F9B87B6B770947A9F8391472E032160CBECF52@ehost011-8.exch011.intermedia.net>
2007-08-14 11:11 ` FW: " Izik Eidus
[not found] ` <46C18616.5000005-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
2007-08-14 11:52 ` user-allocated memory Avi Kivity
[not found] ` <46C19778.4090502-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-08-14 12:54 ` Carsten Otte
[not found] ` <46C1A5F4.4070006-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
2007-08-14 13:01 ` Avi Kivity
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.