From: Marcelo Tosatti <mtosatti@redhat.com>
To: Takuya Yoshikawa <takuya.yoshikawa@gmail.com>
Cc: linux-arch@vger.kernel.org, arnd@arndb.de, kvm@vger.kernel.org,
kvm-ia64@vger.kernel.org, fernando@oss.ntt.co.jp, x86@kernel.org,
agraf@suse.de, kvm-ppc@vger.kernel.org,
linux-kernel@vger.kernel.org, yoshikawa.takuya@oss.ntt.co.jp,
linuxppc-dev@ozlabs.org, mingo@redhat.com, paulus@samba.org,
avi@redhat.com, hpa@zytor.com, tglx@linutronix.de
Subject: Re: [RFC][PATCH RFC 10/12] KVM: move dirty bitmaps to user space
Date: Tue, 11 May 2010 00:28:28 -0300 [thread overview]
Message-ID: <20100511032827.GA3458@amt.cnet> (raw)
In-Reply-To: <20100504220702.f8ba6ccc.takuya.yoshikawa@gmail.com>
On Tue, May 04, 2010 at 10:07:02PM +0900, Takuya Yoshikawa wrote:
> We move dirty bitmaps to user space.
>
> - Allocation and destruction: we use do_mmap() and do_munmap().
> The new bitmap space is twice longer than the original one and we
> use the additional space for double buffering: this makes it
> possible to update the active bitmap while letting the user space
> read the other one safely. For x86, we can also remove the vmalloc()
> in kvm_vm_ioctl_get_dirty_log().
>
> - Bitmap manipulations: we replace all functions which access dirty
> bitmaps with *_user() functions.
>
> - For ia64: moving the dirty bitmaps of memory slots does not affect
> ia64 much because it's using a different place to store dirty logs
> rather than the dirty bitmaps of memory slots: all we have to change
> are sync and get of dirty log, so we don't need set_bit_user like
> functions for ia64.
>
> Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
> Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
> CC: Avi Kivity <avi@redhat.com>
> CC: Alexander Graf <agraf@suse.de>
> ---
> arch/ia64/kvm/kvm-ia64.c | 15 +++++++++-
> arch/powerpc/kvm/book3s.c | 5 +++-
> arch/x86/kvm/x86.c | 25 ++++++++----------
> include/linux/kvm_host.h | 3 +-
> virt/kvm/kvm_main.c | 62 +++++++++++++++++++++++++++++++++++++-------
> 5 files changed, 82 insertions(+), 28 deletions(-)
>
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 17fd65c..03503e6 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -1823,11 +1823,19 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
> n = kvm_dirty_bitmap_bytes(memslot);
> base = memslot->base_gfn / BITS_PER_LONG;
>
> + r = -EFAULT;
> + if (!access_ok(VERIFY_WRITE, memslot->dirty_bitmap, n))
> + goto out;
> +
> for (i = 0; i < n/sizeof(long); ++i) {
> if (dirty_bitmap[base + i])
> memslot->is_dirty = true;
>
> - memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
> + if (__put_user(dirty_bitmap[base + i],
> + &memslot->dirty_bitmap[i])) {
> + r = -EFAULT;
> + goto out;
> + }
> dirty_bitmap[base + i] = 0;
> }
> r = 0;
> @@ -1858,7 +1866,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
> if (memslot->is_dirty) {
> kvm_flush_remote_tlbs(kvm);
> n = kvm_dirty_bitmap_bytes(memslot);
> - memset(memslot->dirty_bitmap, 0, n);
> + if (clear_user(memslot->dirty_bitmap, n)) {
> + r = -EFAULT;
> + goto out;
> + }
> memslot->is_dirty = false;
> }
> r = 0;
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 4b074f1..2a31d2f 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -1210,7 +1210,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
> kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
>
> n = kvm_dirty_bitmap_bytes(memslot);
> - memset(memslot->dirty_bitmap, 0, n);
> + if (clear_user(memslot->dirty_bitmap, n)) {
> + r = -EFAULT;
> + goto out;
> + }
> memslot->is_dirty = false;
> }
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 023c7f8..32a3d94 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2760,40 +2760,37 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
> /* If nothing is dirty, don't bother messing with page tables. */
> if (memslot->is_dirty) {
> struct kvm_memslots *slots, *old_slots;
> - unsigned long *dirty_bitmap;
> + unsigned long __user *dirty_bitmap;
> + unsigned long __user *dirty_bitmap_old;
>
> spin_lock(&kvm->mmu_lock);
> kvm_mmu_slot_remove_write_access(kvm, log->slot);
> spin_unlock(&kvm->mmu_lock);
>
> - r = -ENOMEM;
> - dirty_bitmap = vmalloc(n);
> - if (!dirty_bitmap)
> + dirty_bitmap = memslot->dirty_bitmap;
> + dirty_bitmap_old = memslot->dirty_bitmap_old;
> + r = -EFAULT;
> + if (clear_user(dirty_bitmap_old, n))
> goto out;
> - memset(dirty_bitmap, 0, n);
>
> r = -ENOMEM;
> slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> - if (!slots) {
> - vfree(dirty_bitmap);
> + if (!slots)
> goto out;
> - }
> +
> memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> - slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
> + slots->memslots[log->slot].dirty_bitmap = dirty_bitmap_old;
> + slots->memslots[log->slot].dirty_bitmap_old = dirty_bitmap;
> slots->memslots[log->slot].is_dirty = false;
>
> old_slots = kvm->memslots;
> rcu_assign_pointer(kvm->memslots, slots);
> synchronize_srcu_expedited(&kvm->srcu);
> - dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
> kfree(old_slots);
>
> r = -EFAULT;
> - if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) {
> - vfree(dirty_bitmap);
> + if (copy_in_user(log->dirty_bitmap, dirty_bitmap, n))
> goto out;
> - }
> - vfree(dirty_bitmap);
> } else {
> r = -EFAULT;
> if (clear_user(log->dirty_bitmap, n))
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 0aa6ecb..c95e2b7 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -116,7 +116,8 @@ struct kvm_memory_slot {
> unsigned long npages;
> unsigned long flags;
> unsigned long *rmap;
> - unsigned long *dirty_bitmap;
> + unsigned long __user *dirty_bitmap;
> + unsigned long __user *dirty_bitmap_old;
> bool is_dirty;
> struct {
> unsigned long rmap_pde;
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 3e3acad..ddcf65a 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -437,8 +437,20 @@ out_err_nodisable:
>
> static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
> {
> - vfree(memslot->dirty_bitmap);
> + unsigned long user_addr;
> + unsigned long n = kvm_dirty_bitmap_bytes(memslot);
> +
> + if (!memslot->dirty_bitmap)
> + return;
> +
> + user_addr = min((unsigned long)memslot->dirty_bitmap,
> + (unsigned long)memslot->dirty_bitmap_old);
> + down_write(¤t->mm->mmap_sem);
> + do_munmap(current->mm, user_addr, 2 * n);
> + up_write(¤t->mm->mmap_sem);
> +
> memslot->dirty_bitmap = NULL;
> + memslot->dirty_bitmap_old = NULL;
> }
>
> /*
> @@ -472,8 +484,12 @@ void kvm_free_physmem(struct kvm *kvm)
> int i;
> struct kvm_memslots *slots = kvm->memslots;
>
> - for (i = 0; i < slots->nmemslots; ++i)
> + for (i = 0; i < slots->nmemslots; ++i) {
> + /* VM process will exit: we don't unmap by ourselves. */
> + slots->memslots[i].dirty_bitmap = NULL;
> + slots->memslots[i].dirty_bitmap_old = NULL;
> kvm_free_physmem_slot(&slots->memslots[i], NULL);
> + }
>
> kfree(kvm->memslots);
> }
> @@ -527,14 +543,35 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
>
> static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
> {
> - unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot);
> + int err;
> + unsigned long user_addr;
> + unsigned long n = kvm_dirty_bitmap_bytes(memslot);
>
> - memslot->dirty_bitmap = vmalloc(dirty_bytes);
> - if (!memslot->dirty_bitmap)
> - return -ENOMEM;
> + down_write(¤t->mm->mmap_sem);
> + user_addr = do_mmap(NULL, 0, 2 * n,
> + PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANONYMOUS, 0);
> + up_write(¤t->mm->mmap_sem);
> +
> + if (IS_ERR((void *)user_addr)) {
> + err = PTR_ERR((void *)user_addr);
> + goto out;
> + }
> +
> + memslot->dirty_bitmap = (unsigned long __user *)user_addr;
> + memslot->dirty_bitmap_old = (unsigned long __user *)(user_addr + n);
> + if (clear_user(memslot->dirty_bitmap, 2 * n)) {
> + err = -EFAULT;
> + goto out_unmap;
> + }
>
> - memset(memslot->dirty_bitmap, 0, dirty_bytes);
> return 0;
> +out_unmap:
> + down_write(¤t->mm->mmap_sem);
> + do_munmap(current->mm, user_addr, 2 * n);
> + up_write(¤t->mm->mmap_sem);
> +out:
> + return err;
> }
>
> /*
> @@ -799,7 +836,7 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
> n = kvm_dirty_bitmap_bytes(memslot);
>
> r = -EFAULT;
> - if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
> + if (copy_in_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
> goto out;
>
> r = 0;
> @@ -1195,11 +1232,16 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
> gfn = unalias_gfn(kvm, gfn);
> memslot = gfn_to_memslot_unaliased(kvm, gfn);
> if (memslot && memslot->dirty_bitmap) {
> - unsigned long rel_gfn = gfn - memslot->base_gfn;
> + int nr = generic_le_bit_offset(gfn - memslot->base_gfn);
>
> - generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
> + if (kvm_set_bit_user(nr, memslot->dirty_bitmap))
> + goto out_fault;
mark_page_dirty is called with the mmu_lock spinlock held in set_spte.
Must find a way to move it outside of the spinlock section.
next prev parent reply other threads:[~2010-05-11 3:44 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-04 12:56 [RFC][PATCH 0/12] KVM, x86, ppc, asm-generic: moving dirty bitmaps to user space Takuya Yoshikawa
2010-05-04 12:58 ` [RFC][PATCH 1/12 applied today] KVM: x86: avoid unnecessary bitmap allocation when memslot is clean Takuya Yoshikawa
2010-05-04 13:00 ` [RFC][PATCH 2/12] KVM: introduce slot level dirty state management Takuya Yoshikawa
2010-05-04 13:01 ` [RFC][PATCH 3/12] KVM: introduce wrapper functions to create and destroy dirty bitmaps Takuya Yoshikawa
2010-05-04 13:02 ` [RFC][PATCH 4/12] x86: introduce copy_in_user() for 32-bit Takuya Yoshikawa
2010-05-04 13:02 ` [RFC][PATCH 5/12] x86: introduce __set_bit() like function for bitmaps in user space Takuya Yoshikawa
2010-05-04 13:03 ` [RFC][PATCH 6/12 not tested yet] PPC: introduce copy_in_user() for 32-bit Takuya Yoshikawa
2010-05-04 13:04 ` [RFC][PATCH 7/12 not tested yet] PPC: introduce __set_bit() like function for bitmaps in user space Takuya Yoshikawa
2010-05-11 16:00 ` Alexander Graf
2010-05-12 9:25 ` Takuya Yoshikawa
2010-05-04 13:05 ` [RFC][PATCH resend 8/12] asm-generic: bitops: introduce le bit offset macro Takuya Yoshikawa
2010-05-04 15:03 ` Arnd Bergmann
2010-05-04 16:08 ` Avi Kivity
2010-05-05 2:59 ` Takuya Yoshikawa
2010-05-06 13:38 ` Arnd Bergmann
2010-05-10 11:46 ` Takuya Yoshikawa
2010-05-10 12:01 ` Avi Kivity
2010-05-10 12:01 ` Arnd Bergmann
2010-05-10 12:09 ` Takuya Yoshikawa
2010-05-04 13:06 ` [RFC][PATCH 9/12] KVM: introduce a wrapper function of set_bit_user_non_atomic() Takuya Yoshikawa
2010-05-04 13:07 ` [RFC][PATCH RFC 10/12] KVM: move dirty bitmaps to user space Takuya Yoshikawa
2010-05-11 3:28 ` Marcelo Tosatti [this message]
2010-05-12 6:27 ` Takuya Yoshikawa
2010-05-04 13:08 ` [RFC][PATCH 11/12] KVM: introduce new API for getting/switching dirty bitmaps Takuya Yoshikawa
2010-05-11 3:43 ` Marcelo Tosatti
2010-05-11 5:53 ` Takuya Yoshikawa
2010-05-11 14:07 ` Marcelo Tosatti
2010-05-12 6:03 ` Takuya Yoshikawa
2010-05-04 13:11 ` [RFC][PATCH 12/12 sample] qemu-kvm: use " Takuya Yoshikawa
2010-05-10 12:06 ` [RFC][PATCH 0/12] KVM, x86, ppc, asm-generic: moving dirty bitmaps to user space Avi Kivity
2010-05-10 12:26 ` Takuya Yoshikawa
2010-05-11 10:11 ` Takuya Yoshikawa
2010-05-13 11:47 ` Avi Kivity
2010-05-17 9:06 ` Takuya Yoshikawa
2010-05-11 15:55 ` Alexander Graf
2010-05-12 9:19 ` Takuya Yoshikawa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100511032827.GA3458@amt.cnet \
--to=mtosatti@redhat.com \
--cc=agraf@suse.de \
--cc=arnd@arndb.de \
--cc=avi@redhat.com \
--cc=fernando@oss.ntt.co.jp \
--cc=hpa@zytor.com \
--cc=kvm-ia64@vger.kernel.org \
--cc=kvm-ppc@vger.kernel.org \
--cc=kvm@vger.kernel.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxppc-dev@ozlabs.org \
--cc=mingo@redhat.com \
--cc=paulus@samba.org \
--cc=takuya.yoshikawa@gmail.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
--cc=yoshikawa.takuya@oss.ntt.co.jp \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).