All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Cao, Lei" <Lei.Cao@stratus.com>
To: "Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>
Subject: [PATCH v3 5/5] KVM: Implement ring-based dirty memory tracking
Date: Fri, 3 Feb 2017 20:06:22 +0000	[thread overview]
Message-ID: <CY1PR08MB199218484C74449CF8281CF4F04F0@CY1PR08MB1992.namprd08.prod.outlook.com> (raw)
In-Reply-To: 201702031949.v13JnvsG032051@dev1.sn.stratus.com

Implement ring-base dirty memory tracking.

Signed-off-by: Lei Cao <lei.cao@stratus.com>
---
 arch/x86/kvm/Makefile    |   3 +-
 include/linux/kvm_host.h |  14 ++++
 virt/kvm/gfn_ring.c      | 100 +++++++++++++++++++++++
 virt/kvm/kvm_main.c      | 209 ++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 313 insertions(+), 13 deletions(-)
 create mode 100644 virt/kvm/gfn_ring.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 3bff207..d832622 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -8,7 +8,8 @@ CFLAGS_vmx.o := -I.
 KVM := ../../../virt/kvm
 
 kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
-				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
+				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \
+				$(KVM)/gfn_ring.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 65561bf..7fd045b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,7 @@
 #include <linux/kvm_types.h>
 
 #include <asm/kvm_host.h>
+#include <linux/kvm_gfn_ring.h>
 
 #ifndef KVM_MAX_VCPU_ID
 #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
@@ -265,6 +266,11 @@ struct kvm_vcpu {
 	bool preempted;
 	struct kvm_vcpu_arch arch;
 	struct dentry *debugfs_dentry;
+
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	u32 max_dirty_logs;
+	struct kvm_gfn_ring dirty_ring;
+#endif
 };
 
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
@@ -430,6 +436,12 @@ struct kvm {
 	struct list_head devices;
 	struct dentry *debugfs_dentry;
 	struct kvm_stat_data **debugfs_stat_data;
+
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	u32 dirty_ring_size;
+	u32 max_dirty_logs;
+	struct kvm_gfn_ring dirty_ring;
+#endif
 };
 
 #define kvm_err(fmt, ...) \
@@ -713,6 +725,8 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 					gfn_t gfn_offset,
 					unsigned long mask);
 
+void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask);
+
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				struct kvm_dirty_log *log);
 
diff --git a/virt/kvm/gfn_ring.c b/virt/kvm/gfn_ring.c
new file mode 100644
index 0000000..5d4977c
--- /dev/null
+++ b/virt/kvm/gfn_ring.c
@@ -0,0 +1,100 @@
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/vmalloc.h>
+#include <linux/kvm_gfn_ring.h>
+
+int kvm_gfn_ring_alloc(struct kvm_gfn_ring *gfnring, u32 size)
+{
+	gfnring->dirty_list = vmalloc(size);
+	if (!gfnring->dirty_list)
+		return -ENOMEM;
+	memset(gfnring->dirty_list, 0, size);
+
+	gfnring->size = size/sizeof(struct kvm_dirty_gfn);
+	gfnring->dirty_index = 0;
+	gfnring->reset_index = 0;
+	spin_lock_init(&gfnring->lock);
+
+	return 0;
+}
+
+int kvm_gfn_ring_reset(struct kvm *kvm, struct kvm_gfn_ring *gfnring)
+{
+	u32 cur_slot, next_slot;
+	u64 cur_offset, next_offset;
+	unsigned long mask = 0;
+	u32 fetch;
+	int count = 0;
+	struct kvm_dirty_list *list = gfnring->dirty_list;
+
+	fetch = READ_ONCE(list->indices.fetch_index);
+	if (fetch == gfnring->reset_index)
+		return 0;
+
+	cur_slot = READ_ONCE(list->dirty_gfns[gfnring->reset_index].slot);
+	cur_offset = READ_ONCE(list->dirty_gfns[gfnring->reset_index].offset);
+	mask = 1;
+	count++;
+	gfnring->reset_index = (gfnring->reset_index + 1) % gfnring->size;
+	while (gfnring->reset_index != fetch) {
+		next_slot =
+		  READ_ONCE(list->dirty_gfns[gfnring->reset_index].slot);
+		next_offset =
+		  READ_ONCE(list->dirty_gfns[gfnring->reset_index].offset);
+		if ((next_slot != cur_slot) ||
+		    (next_offset < cur_offset) ||
+		    ((next_offset - cur_offset) > (BITS_PER_LONG - 1))) {
+			kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+			cur_slot = next_slot;
+			cur_offset = next_offset;
+			mask = 1;
+		} else
+			mask |= (u64)1 << (next_offset - cur_offset);
+		count++;
+		gfnring->reset_index = (gfnring->reset_index + 1) %
+					gfnring->size;
+	}
+	kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+
+	return count;
+}
+
+int kvm_gfn_ring_push(struct kvm_gfn_ring *gfnring,
+		      u32 slot,
+		      u64 offset,
+		      bool locked)
+{
+	int num;
+	struct kvm_dirty_list *list = gfnring->dirty_list;
+
+	if (((gfnring->dirty_index + 1) % gfnring->size) ==
+	    gfnring->reset_index)
+		return -EBUSY;
+
+	if (locked)
+		spin_lock(&gfnring->lock);
+
+	list->dirty_gfns[gfnring->dirty_index].slot = slot;
+	list->dirty_gfns[gfnring->dirty_index].offset = offset;
+	smp_wmb();
+	gfnring->dirty_index = (gfnring->dirty_index+1) % gfnring->size;
+	num = (gfnring->dirty_index - gfnring->reset_index) % gfnring->size;
+	list->indices.avail_index = gfnring->dirty_index;
+
+	if (locked)
+		spin_unlock(&gfnring->lock);
+
+	return num;
+}
+
+struct page *kvm_gfn_ring_get_page(struct kvm_gfn_ring *ring, u32 i)
+{
+	return vmalloc_to_page((void *)ring->dirty_list+i*PAGE_SIZE);
+
+}
+
+void kvm_gfn_ring_free(struct kvm_gfn_ring *gfnring)
+{
+	if (gfnring->dirty_list)
+		vfree(gfnring->dirty_list);
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 417c0ff..569343f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -63,6 +63,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
 
+#include <linux/kvm_gfn_ring.h>
+
 /* Worst case buffer size needed for holding an integer. */
 #define ITOA_MAX_LEN 12
 
@@ -121,7 +123,16 @@ static void hardware_disable_all(void);
 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
 
 static void kvm_release_pfn_dirty(kvm_pfn_t pfn);
-static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+				    struct kvm_vcpu *vcpu,
+				    struct kvm_memory_slot *memslot,
+				    gfn_t gfn);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+static void mark_page_dirty_in_ring(struct kvm *kvm,
+				    struct kvm_vcpu *vcpu,
+				    struct kvm_memory_slot *slot,
+				    gfn_t gfn);
+#endif
 
 __visible bool kvm_rebooting;
 EXPORT_SYMBOL_GPL(kvm_rebooting);
@@ -258,11 +269,34 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	kvm_vcpu_set_dy_eligible(vcpu, false);
 	vcpu->preempted = false;
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (kvm->dirty_ring_size) {
+		r = kvm_gfn_ring_alloc(&vcpu->dirty_ring,
+				       kvm->dirty_ring_size);
+		if (r) {
+			kvm->dirty_ring_size = 0;
+			goto fail_free_run;
+		}
+		vcpu->max_dirty_logs =
+			(kvm->dirty_ring_size/sizeof(struct kvm_dirty_gfn))
+			- 1 - kvm_cpu_dirty_log_size();
+	}
+#endif
+
 	r = kvm_arch_vcpu_init(vcpu);
 	if (r < 0)
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+		goto fail_free_ring;
+#else
 		goto fail_free_run;
+#endif
 	return 0;
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+fail_free_ring:
+	if (kvm->dirty_ring_size)
+		kvm_gfn_ring_free(&vcpu->dirty_ring);
+#endif
 fail_free_run:
 	free_page((unsigned long)vcpu->run);
 fail:
@@ -275,6 +309,10 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
 	put_pid(vcpu->pid);
 	kvm_arch_vcpu_uninit(vcpu);
 	free_page((unsigned long)vcpu->run);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (vcpu->kvm->dirty_ring_size)
+		kvm_gfn_ring_free(&vcpu->dirty_ring);
+#endif
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
 
@@ -726,6 +764,10 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kvm_io_bus_destroy(kvm->buses[i]);
 	kvm_coalesced_mmio_free(kvm);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (kvm->dirty_ring_size)
+		kvm_gfn_ring_free(&kvm->dirty_ring);
+#endif
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
 #else
@@ -1861,7 +1903,8 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
 
-static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
+static int __kvm_write_guest_page(struct kvm *kvm, struct kvm_vcpu *vcpu,
+				  struct kvm_memory_slot *memslot, gfn_t gfn,
 			          const void *data, int offset, int len)
 {
 	int r;
@@ -1873,7 +1916,7 @@ static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
 	r = __copy_to_user((void __user *)addr + offset, data, len);
 	if (r)
 		return -EFAULT;
-	mark_page_dirty_in_slot(memslot, gfn);
+	mark_page_dirty_in_slot(kvm, vcpu, memslot, gfn);
 	return 0;
 }
 
@@ -1882,7 +1925,8 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
 {
 	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 
-	return __kvm_write_guest_page(slot, gfn, data, offset, len);
+	return __kvm_write_guest_page(kvm, NULL, slot, gfn, data,
+				      offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
 
@@ -1891,7 +1935,8 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
 {
 	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
-	return __kvm_write_guest_page(slot, gfn, data, offset, len);
+	return __kvm_write_guest_page(vcpu->kvm, vcpu, slot, gfn, data,
+				      offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
 
@@ -1995,7 +2040,7 @@ int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cac
 	r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
 	if (r)
 		return -EFAULT;
-	mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
+	mark_page_dirty_in_slot(v->kvm, v, ghc->memslot, gpa >> PAGE_SHIFT);
 
 	return 0;
 }
@@ -2060,12 +2105,17 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
-static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot,
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+				    struct kvm_vcpu *vcpu,
+				    struct kvm_memory_slot *memslot,
 				    gfn_t gfn)
 {
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+		mark_page_dirty_in_ring(kvm, vcpu, memslot, gfn);
+#endif
 		set_bit_le(rel_gfn, memslot->dirty_bitmap);
 	}
 }
@@ -2075,7 +2125,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 	struct kvm_memory_slot *memslot;
 
 	memslot = gfn_to_memslot(kvm, gfn);
-	mark_page_dirty_in_slot(memslot, gfn);
+	mark_page_dirty_in_slot(kvm, NULL, memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty);
 
@@ -2084,7 +2134,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
 	struct kvm_memory_slot *memslot;
 
 	memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	mark_page_dirty_in_slot(memslot, gfn);
+	mark_page_dirty_in_slot(vcpu->kvm, vcpu, memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
 
@@ -2363,6 +2413,11 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
 		page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
 #endif
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	else if (vmf->pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET)
+		page = kvm_gfn_ring_get_page(&vcpu->dirty_ring,
+				vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET);
+#endif
 	else
 		return kvm_arch_vcpu_fault(vcpu, vmf);
 	get_page(page);
@@ -2946,14 +3001,118 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 }
 
 #ifdef KVM_DIRTY_LOG_PAGE_OFFSET
-static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, __u32 size)
+static void mark_page_dirty_in_ring(struct kvm *kvm,
+				    struct kvm_vcpu *vcpu,
+				    struct kvm_memory_slot *slot,
+				    gfn_t gfn)
 {
-	return -EINVAL;
+	struct kvm_gfn_ring *gfnlist;
+	u32 as_id = 0;
+	u64 offset;
+	struct kvm_vcpu *exit_vcpu = vcpu;
+	int num;
+	bool locked;
+	u32 max;
+
+	if (!kvm->dirty_ring_size)
+		return;
+
+	offset = gfn - slot->base_gfn;
+
+	if (test_bit_le(offset, slot->dirty_bitmap))
+		return;
+
+	if (vcpu)
+		as_id = kvm_arch_vcpu_memslots_id(vcpu);
+
+	locked = (vcpu == NULL);
+
+	if (vcpu) {
+		gfnlist = &vcpu->dirty_ring;
+		max = vcpu->max_dirty_logs;
+	} else {
+		gfnlist = &kvm->dirty_ring;
+		max = kvm->max_dirty_logs;
+	}
+
+	num = kvm_gfn_ring_push(gfnlist, (as_id << 16)|slot->id,
+		offset, locked);
+	if (num < 0) {
+		if (vcpu)
+			WARN_ONCE(1, "vcpu %d dirty log overflow\n",
+				vcpu->vcpu_id);
+		else
+			WARN_ONCE(1, "global dirty log overflow\n");
+		return;
+	}
+
+	if (num == max) {
+		if (!exit_vcpu)
+			exit_vcpu = kvm->vcpus[0];
+		kvm_make_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, exit_vcpu);
+	}
+}
+
+void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
+{
+	struct kvm_memory_slot *memslot;
+	int as_id, id;
+
+	as_id = slot >> 16;
+	id = (u16)slot;
+	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
+		return;
+
+	memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
+	if (offset >= memslot->npages)
+		return;
+
+	spin_lock(&kvm->mmu_lock);
+	kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask);
+	spin_unlock(&kvm->mmu_lock);
+
+	while (mask) {
+		clear_bit_le(offset + __ffs(mask), memslot->dirty_bitmap);
+		mask &= mask - 1;
+	}
+}
+
+static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size)
+{
+	int r;
+
+	kvm->dirty_ring_size = size;
+	kvm->max_dirty_logs = (size/sizeof(struct kvm_dirty_gfn)) - 1;
+	r = kvm_gfn_ring_alloc(&kvm->dirty_ring, size);
+	if (r) {
+		kvm_put_kvm(kvm);
+		return r;
+	}
+	return 0;
 }
 
 static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm)
 {
-	return -EINVAL;
+	int i;
+	struct kvm_vcpu *vcpu;
+	int cleared = 0;
+
+	if (!kvm->dirty_ring_size)
+		return -EINVAL;
+
+	mutex_lock(&kvm->slots_lock);
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		cleared += kvm_gfn_ring_reset(kvm, &vcpu->dirty_ring);
+
+	cleared += kvm_gfn_ring_reset(kvm, &kvm->dirty_ring);
+
+	mutex_unlock(&kvm->slots_lock);
+
+	if (cleared)
+		kvm_flush_remote_tlbs(kvm);
+
+	return cleared;
 }
 #endif
 
@@ -3202,6 +3361,29 @@ static long kvm_vm_compat_ioctl(struct file *filp,
 }
 #endif
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kvm *kvm = vma->vm_file->private_data;
+	struct page *page;
+
+	page = kvm_gfn_ring_get_page(&kvm->dirty_ring, vmf->pgoff);
+	get_page(page);
+	vmf->page = page;
+	return 0;
+}
+
+static const struct vm_operations_struct kvm_vm_vm_ops = {
+	.fault = kvm_vm_fault,
+};
+
+static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	vma->vm_ops = &kvm_vm_vm_ops;
+	return 0;
+}
+#endif
+
 static struct file_operations kvm_vm_fops = {
 	.release        = kvm_vm_release,
 	.unlocked_ioctl = kvm_vm_ioctl,
@@ -3209,6 +3391,9 @@ static struct file_operations kvm_vm_fops = {
 	.compat_ioctl   = kvm_vm_compat_ioctl,
 #endif
 	.llseek		= noop_llseek,
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	.mmap           = kvm_vm_mmap,
+#endif
 };
 
 static int kvm_dev_ioctl_create_vm(unsigned long type)
-- 
2.5.0




       reply	other threads:[~2017-02-03 20:06 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <201702031949.v13JnvsG032051@dev1.sn.stratus.com>
2017-02-03 20:06 ` Cao, Lei [this message]
2017-02-04  7:01   ` [PATCH v3 5/5] KVM: Implement ring-based dirty memory tracking Paolo Bonzini
2017-02-04  7:12   ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CY1PR08MB199218484C74449CF8281CF4F04F0@CY1PR08MB1992.namprd08.prod.outlook.com \
    --to=lei.cao@stratus.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.