linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
@ 2014-06-04 21:11 Mario Smarduch
  2014-06-05  6:55 ` Xiao Guangrong
  0 siblings, 1 reply; 17+ messages in thread
From: Mario Smarduch @ 2014-06-04 21:11 UTC (permalink / raw)
  To: linux-arm-kernel

Resending patch, noticed I forgot to adjust start_ipa properly in 
stage2_wp_mask_range() and then noticed that pte's can be indexed directly. 
The patch applies cleanly after 2/4 and 4/4 applies cleanly after this patch.

This patch adds support for keeping track of VM dirty pages. As dirty page log
is retrieved, the pages that have been written are write protected again for
next write and log read.

Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
 arch/arm/include/asm/kvm_host.h |    3 ++
 arch/arm/kvm/arm.c              |    5 ---
 arch/arm/kvm/mmu.c              |   79 +++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |   86 ---------------------------------------
 virt/kvm/kvm_main.c             |   81 ++++++++++++++++++++++++++++++++++++
 5 files changed, 163 insertions(+), 91 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 59565f5..b760f9c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -232,5 +232,8 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+	struct kvm_memory_slot *slot,
+	gfn_t gfn_offset, unsigned long mask);
 
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dfd63ac..f06fb21 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -780,11 +780,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	}
 }
 
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	return -EINVAL;
-}
-
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 					struct kvm_arm_device_addr *dev_addr)
 {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index e5dff85..5ede813 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -874,6 +874,85 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	spin_unlock(&kvm->mmu_lock);
 }
 
+/**
+ * stage2_wp_mask_range() - write protect memslot pages set in mask
+ * @pmd - pointer to page table
+ * @start_ipa - the start range of mask
+ * @addr - start_ipa or start range of adjusted mask if crossing PMD range
+ * @mask - mask of dirty pages
+ *
+ * Walk mask and write protect the associated dirty pages in the memory region.
+ * If mask crosses a PMD range adjust it to next page table and return.
+ */
+static void stage2_wp_mask_range(pmd_t *pmd, phys_addr_t start_ipa,
+		phys_addr_t *addr, unsigned long *mask)
+{
+	pte_t *pte;
+	bool crosses_pmd;
+	int i = __ffs(*mask);
+
+	if (unlikely(*addr > start_ipa))
+		start_ipa = *addr - i * PAGE_SIZE;
+	pte = pte_offset_kernel(pmd, start_ipa);
+	for (*addr = start_ipa + i * PAGE_SIZE; *mask;
+		i = __ffs(*mask), *addr = start_ipa + i * PAGE_SIZE) {
+		crosses_pmd = !!((start_ipa & PMD_MASK) ^ (*addr & PMD_MASK));
+		if (unlikely(crosses_pmd)) {
+			/* Adjust mask dirty bits relative to next page table */
+			*mask >>= (PTRS_PER_PTE - pte_index(start_ipa));
+			return;
+		}
+		if (!pte_none(pte[i]))
+			kvm_set_s2pte_readonly(&pte[i]);
+		*mask &= ~(1 << i);
+	}
+}
+
+/**
+ * kvm_mmu_write_protected_pt_masked() - write protect dirty pages set in mask
+ * @kvm:        The KVM pointer
+ * @slot:       The memory slot associated with mask
+ * @gfn_offset: The gfn offset in memory slot
+ * @mask:       The mask of dirty pages@offset 'gnf_offset' in this memory
+ *              slot to be write protected
+ *
+ * Called from dirty page logging read function to write protect bits set in
+ * mask to record future writes to these pages in dirty page log. This function
+ * uses simplified page table walk given  mask can spawn no more then 2 PMD
+ * table range.
+ * 'kvm->mmu_lock' must be  held to protect against concurrent modification
+ * of page tables (2nd stage fault, mmu modifiers, ...)
+ *
+ */
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask)
+{
+	pud_t *pud;
+	pmd_t *pmd;
+	phys_addr_t start_ipa = (slot->base_gfn + gfn_offset) << PAGE_SHIFT;
+	phys_addr_t end_ipa = start_ipa + BITS_PER_LONG * PAGE_SIZE;
+	phys_addr_t addr = start_ipa;
+	pgd_t *pgdp = kvm->arch.pgd, *pgd;
+
+	do {
+		pgd = pgdp + pgd_index(addr);
+		if (pgd_present(*pgd)) {
+			pud = pud_offset(pgd, addr);
+			if (!pud_none(*pud) && !pud_huge(*pud)) {
+				pmd = pmd_offset(pud, addr);
+				if (!pmd_none(*pmd) && !kvm_pmd_huge(*pmd))
+					stage2_wp_mask_range(pmd, start_ipa,
+								&addr, &mask);
+				else
+					addr += PMD_SIZE;
+			} else
+				addr += PUD_SIZE;
+		} else
+			addr += PGDIR_SIZE;
+	} while (mask && addr < end_ipa);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c5582c3..a603ca3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3569,92 +3569,6 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	return 0;
 }
 
-/**
- * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
- * @kvm: kvm instance
- * @log: slot id and address to which we copy the log
- *
- * We need to keep it in mind that VCPU threads can write to the bitmap
- * concurrently.  So, to avoid losing data, we keep the following order for
- * each bit:
- *
- *   1. Take a snapshot of the bit and clear it if needed.
- *   2. Write protect the corresponding page.
- *   3. Flush TLB's if needed.
- *   4. Copy the snapshot to the userspace.
- *
- * Between 2 and 3, the guest may write to the page using the remaining TLB
- * entry.  This is not a problem because the page will be reported dirty at
- * step 4 using the snapshot taken before and step 3 ensures that successive
- * writes will be logged for the next call.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	int r;
-	struct kvm_memory_slot *memslot;
-	unsigned long n, i;
-	unsigned long *dirty_bitmap;
-	unsigned long *dirty_bitmap_buffer;
-	bool is_dirty = false;
-
-	mutex_lock(&kvm->slots_lock);
-
-	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
-		goto out;
-
-	memslot = id_to_memslot(kvm->memslots, log->slot);
-
-	dirty_bitmap = memslot->dirty_bitmap;
-	r = -ENOENT;
-	if (!dirty_bitmap)
-		goto out;
-
-	n = kvm_dirty_bitmap_bytes(memslot);
-
-	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
-	memset(dirty_bitmap_buffer, 0, n);
-
-	spin_lock(&kvm->mmu_lock);
-
-	for (i = 0; i < n / sizeof(long); i++) {
-		unsigned long mask;
-		gfn_t offset;
-
-		if (!dirty_bitmap[i])
-			continue;
-
-		is_dirty = true;
-
-		mask = xchg(&dirty_bitmap[i], 0);
-		dirty_bitmap_buffer[i] = mask;
-
-		offset = i * BITS_PER_LONG;
-		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
-	}
-
-	spin_unlock(&kvm->mmu_lock);
-
-	/* See the comments in kvm_mmu_slot_remove_write_access(). */
-	lockdep_assert_held(&kvm->slots_lock);
-
-	/*
-	 * All the TLBs can be flushed out of mmu lock, see the comments in
-	 * kvm_mmu_slot_remove_write_access().
-	 */
-	if (is_dirty)
-		kvm_flush_remote_tlbs(kvm);
-
-	r = -EFAULT;
-	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
-		goto out;
-
-	r = 0;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 			bool line_status)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ba25765..d8d5091 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -429,6 +429,87 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 	return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
 }
 
+
+/**
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
+ *
+ * We need to keep it in mind that VCPU threads can write to the bitmap
+ * concurrently.  So, to avoid losing data, we keep the following order for
+ * each bit:
+ *
+ *   1. Take a snapshot of the bit and clear it if needed.
+ *   2. Write protect the corresponding page.
+ *   3. Flush TLB's if needed.
+ *   4. Copy the snapshot to the userspace.
+ *
+ * Between 2 and 3, the guest may write to the page using the remaining TLB
+ * entry.  This is not a problem because the page will be reported dirty at
+ * step 4 using the snapshot taken before and step 3 ensures that successive
+ * writes will be logged for the next call.
+ */
+
+int __weak kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+						struct kvm_dirty_log *log)
+{
+	int r;
+	struct kvm_memory_slot *memslot;
+	unsigned long n, i;
+	unsigned long *dirty_bitmap;
+	unsigned long *dirty_bitmap_buffer;
+	bool is_dirty = false;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	memslot = id_to_memslot(kvm->memslots, log->slot);
+
+	dirty_bitmap = memslot->dirty_bitmap;
+	r = -ENOENT;
+	if (!dirty_bitmap)
+		goto out;
+
+	n = kvm_dirty_bitmap_bytes(memslot);
+
+	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+	memset(dirty_bitmap_buffer, 0, n);
+
+	spin_lock(&kvm->mmu_lock);
+
+	for (i = 0; i < n / sizeof(long); i++) {
+		unsigned long mask;
+		gfn_t offset;
+
+		if (!dirty_bitmap[i])
+			continue;
+
+		is_dirty = true;
+
+		mask = xchg(&dirty_bitmap[i], 0);
+		dirty_bitmap_buffer[i] = mask;
+
+		offset = i * BITS_PER_LONG;
+		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
+	}
+	if (is_dirty)
+		kvm_flush_remote_tlbs(kvm);
+
+	spin_unlock(&kvm->mmu_lock);
+
+	r = -EFAULT;
+	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+		goto out;
+
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
 #else  /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
 
 static int kvm_init_mmu_notifier(struct kvm *kvm)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-04 21:11 [RESEND PATCH v7 3/4] arm: dirty log write protect management support Mario Smarduch
@ 2014-06-05  6:55 ` Xiao Guangrong
  2014-06-05 19:09   ` Mario Smarduch
  0 siblings, 1 reply; 17+ messages in thread
From: Xiao Guangrong @ 2014-06-05  6:55 UTC (permalink / raw)
  To: linux-arm-kernel

On 06/05/2014 05:11 AM, Mario Smarduch wrote:

> +	spin_lock(&kvm->mmu_lock);
> +
> +	for (i = 0; i < n / sizeof(long); i++) {
> +		unsigned long mask;
> +		gfn_t offset;
> +
> +		if (!dirty_bitmap[i])
> +			continue;
> +
> +		is_dirty = true;
> +
> +		mask = xchg(&dirty_bitmap[i], 0);
> +		dirty_bitmap_buffer[i] = mask;
> +
> +		offset = i * BITS_PER_LONG;
> +		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
> +	}
> +	if (is_dirty)
> +		kvm_flush_remote_tlbs(kvm);

You moved the flush into mmu-lock. Please do not :).

See commit 198c74f43f0f5473f99967aead30ddc622804bc1

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-05  6:55 ` Xiao Guangrong
@ 2014-06-05 19:09   ` Mario Smarduch
  2014-06-06  5:52     ` Xiao Guangrong
  0 siblings, 1 reply; 17+ messages in thread
From: Mario Smarduch @ 2014-06-05 19:09 UTC (permalink / raw)
  To: linux-arm-kernel

On 06/04/2014 11:55 PM, Xiao Guangrong wrote:
> On 06/05/2014 05:11 AM, Mario Smarduch wrote:
> 
>> +	spin_lock(&kvm->mmu_lock);
>> +
>> +	for (i = 0; i < n / sizeof(long); i++) {
>> +		unsigned long mask;
>> +		gfn_t offset;
>> +
>> +		if (!dirty_bitmap[i])
>> +			continue;
>> +
>> +		is_dirty = true;
>> +
>> +		mask = xchg(&dirty_bitmap[i], 0);
>> +		dirty_bitmap_buffer[i] = mask;
>> +
>> +		offset = i * BITS_PER_LONG;
>> +		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
>> +	}
>> +	if (is_dirty)
>> +		kvm_flush_remote_tlbs(kvm);
> 
> You moved the flush into mmu-lock. Please do not :).
> 
> See commit 198c74f43f0f5473f99967aead30ddc622804bc1
> 

Thanks for reviewing, I revised to pick up your version.

Functionally there should be no impact on ARM, the
TLB flush function is different.

- Mario

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-05 19:09   ` Mario Smarduch
@ 2014-06-06  5:52     ` Xiao Guangrong
  2014-06-06 17:36       ` Mario Smarduch
  0 siblings, 1 reply; 17+ messages in thread
From: Xiao Guangrong @ 2014-06-06  5:52 UTC (permalink / raw)
  To: linux-arm-kernel

On 06/06/2014 03:09 AM, Mario Smarduch wrote:
> On 06/04/2014 11:55 PM, Xiao Guangrong wrote:
>> On 06/05/2014 05:11 AM, Mario Smarduch wrote:
>>
>>> +	spin_lock(&kvm->mmu_lock);
>>> +
>>> +	for (i = 0; i < n / sizeof(long); i++) {
>>> +		unsigned long mask;
>>> +		gfn_t offset;
>>> +
>>> +		if (!dirty_bitmap[i])
>>> +			continue;
>>> +
>>> +		is_dirty = true;
>>> +
>>> +		mask = xchg(&dirty_bitmap[i], 0);
>>> +		dirty_bitmap_buffer[i] = mask;
>>> +
>>> +		offset = i * BITS_PER_LONG;
>>> +		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
>>> +	}
>>> +	if (is_dirty)
>>> +		kvm_flush_remote_tlbs(kvm);
>>
>> You moved the flush into mmu-lock. Please do not :).
>>
>> See commit 198c74f43f0f5473f99967aead30ddc622804bc1
>>
> 
> Thanks for reviewing, I revised to pick up your version.
> 
> Functionally there should be no impact on ARM, the
> TLB flush function is different.

Yeah, i agree your point on ARM, but your patch moved
the function from x86 to the common code, that means
this function is reused between ARM and x86. No?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-06  5:52     ` Xiao Guangrong
@ 2014-06-06 17:36       ` Mario Smarduch
  0 siblings, 0 replies; 17+ messages in thread
From: Mario Smarduch @ 2014-06-06 17:36 UTC (permalink / raw)
  To: linux-arm-kernel

On 06/05/2014 10:52 PM, Xiao Guangrong wrote:
> On 06/06/2014 03:09 AM, Mario Smarduch wrote:
>> On 06/04/2014 11:55 PM, Xiao Guangrong wrote:
>>> On 06/05/2014 05:11 AM, Mario Smarduch wrote:
>>>
>>>> +	spin_lock(&kvm->mmu_lock);
>>>> +
>>>> +	for (i = 0; i < n / sizeof(long); i++) {
>>>> +		unsigned long mask;
>>>> +		gfn_t offset;
>>>> +
>>>> +		if (!dirty_bitmap[i])
>>>> +			continue;
>>>> +
>>>> +		is_dirty = true;
>>>> +
>>>> +		mask = xchg(&dirty_bitmap[i], 0);
>>>> +		dirty_bitmap_buffer[i] = mask;
>>>> +
>>>> +		offset = i * BITS_PER_LONG;
>>>> +		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
>>>> +	}
>>>> +	if (is_dirty)
>>>> +		kvm_flush_remote_tlbs(kvm);
>>>
>>> You moved the flush into mmu-lock. Please do not :).
>>>
>>> See commit 198c74f43f0f5473f99967aead30ddc622804bc1
>>>
>>
>> Thanks for reviewing, I revised to pick up your version.
>>
>> Functionally there should be no impact on ARM, the
>> TLB flush function is different.
> 
> Yeah, i agree your point on ARM, but your patch moved
> the function from x86 to the common code, that means
> this function is reused between ARM and x86. No?
> 

Yes you pretty much summarized it. My point was more like
I'm glad the change had no impact on ARM :)

Thanks,
- Mario

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-07-03 15:04               ` Christoffer Dall
  2014-07-04 16:29                 ` Paolo Bonzini
@ 2014-07-17 16:17                 ` Mario Smarduch
  1 sibling, 0 replies; 17+ messages in thread
From: Mario Smarduch @ 2014-07-17 16:17 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Christoffer,
   Just back from holiday - a short plan to resume work.

- move VM tlb flush and kvm log functions to generic, per Paolo's
comments use Kconfig approach
- update other architectures make sure they compile
- Keep it ARMv7 for now

Get maintainers to test the branch.

In parallel add dirty log support to ARMv8, to test I would
add a QEMU monitor function to validate general operation.

Your thoughts?

Thanks,
  Mario

On 07/03/2014 08:04 AM, Christoffer Dall wrote:
> On Tue, Jun 17, 2014 at 06:41:52PM -0700, Mario Smarduch wrote:
>> On 06/11/2014 12:03 AM, Christoffer Dall wrote:
>>
>>>>
>>>> There is also the issue of kvm_flush_remote_tlbs(), that's also weak,
>>>> the generic one is using IPIs. Since it's only used in mmu.c maybe make 
>>>> this one static.
>>>>
>>> So I don't see a lot of use of weak symbols in kvm_main.c (actually on
>>> kvmarm/next I don't see any), but we do want to share code when more
>>> than one architecture implements something in the exact same way, like
>>> it seems x86 and ARM is doing here for this particular function.
>>>
>>> I think the KVM scheme is usually to check for some define, like:
>>>
>>> #ifdef KVM_ARCH_HAVE_GET_DIRTY_LOG
>>> 	ret = kvm_arch_get_dirty_log(...);
>>> #else
>>> 	ret = kvm_get_dirty_log(...);
>>> #endif
>>>
>>> but Paolo may have a more informed oppinion of how to deal with these.
>>>
>>> Thanks,
>>> -Christoffer
>>>
>>
>>  
>> One approach I'm trying looking at the code in kvm_main().
>> This approach applies more to selecting features as opposed to
>> selecting generic vs architecture specific functions.
>>
>> 1.-------------------------------------------------
>>  - add to 'virt/kvm/Kconfig'
>> config HAVE_KVM_ARCH_TLB_FLUSH_ALL
>>        bool
>>
>> config HAVE_KVM_ARCH_DIRTY_LOG
>>        bool
>> 2.--------------------------------------------------
>> For ARM and later ARM64 add to 'arch/arm[64]/kvm/Kconfig'
>> config KVM
>>         bool "Kernel-based Virtual Machine (KVM) support"
>> ...
>> select HAVE_KVM_ARCH_TLB_FLUSH_ALL
>> ..
>>
>> Not for HAVE_KVM_ARCH_DIRTY_LOG given it's shared with x86,
>> but would need to do it for every other architecture that
>> does not share it (except initially for arm64 since it
>> will use the variant that returns -EINVAL until feature
>> is supported)
>>
>> 3------------------------------------------------------
>> In kvm_main.c would have something like
>>
>> void kvm_flush_remote_tlbs(struct kvm *kvm)
>> {
>> #ifdef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
>>         kvm_arch_flush_remote_tlbs(kvm);
>> #else
>>         long dirty_count = kvm->tlbs_dirty;
>>
>>         smp_mb();
>>         if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
>>                 ++kvm->stat.remote_tlb_flush;
>>         cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
>> #endif
>> }
>>
>> Then add void kvm_flush_remote_tlbs(struct kvm *kvm) definition
>> to arm kvm_host.h. Define the function in this case mmu.c
>>
>> For the dirty log function
>> int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>>                                                 struct kvm_dirty_log *log)
>> {
>> #ifdef CONFIG_HAVE_KVM_ARCH_DIRTY_LOG
>>         kvm_arch_vm_ioctl_get_dirty_log(kvm, log);
>> #else
>>         int r;
>>         struct kvm_memory_slot *memslot;
>>         unsigned long n, i;
>>         unsigned long *dirty_bitmap;
>>         unsigned long *dirty_bitmap_buffer;
>>         bool is_dirty = false;
>> 	...
>>
>> But then you have to go into every architecture and define the
>> kvm_arch_vm_...() variant.
>>
>> Is this the right way to go? Or is there a simpler way?
>>
> Hmmm, I'm really not an expert in the 'established procedures' for what
> to put in config files etc., but here's my basic take:
> 
> a) you wouldn't put a config option in Kconfig unless it's comething
> that's actually configurable or some generic feature/subsystem that
> should only be enabled if hardware has certain capabilities or other
> config options enabled.
> 
> b) this seems entirely an implementation issue and not depending on
> anything users should select.
> 
> c) therefore, I think it's either a question of always having an
> arch-specific implementation that you probe for its return value or you
> have some sort of define in the header files for the
> arch/X/include/asm/kvm_host.h to control what you need.
> 
> -Christoffer
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-07-04 16:29                 ` Paolo Bonzini
@ 2014-07-17 16:00                   ` Mario Smarduch
  0 siblings, 0 replies; 17+ messages in thread
From: Mario Smarduch @ 2014-07-17 16:00 UTC (permalink / raw)
  To: linux-arm-kernel

On 07/04/2014 09:29 AM, Paolo Bonzini wrote:
> Il 03/07/2014 17:04, Christoffer Dall ha scritto:
>> Hmmm, I'm really not an expert in the 'established procedures' for what
>> to put in config files etc., but here's my basic take:
>>
>> a) you wouldn't put a config option in Kconfig unless it's comething
>> that's actually configurable or some generic feature/subsystem that
>> should only be enabled if hardware has certain capabilities or other
>> config options enabled.
>>
>> b) this seems entirely an implementation issue and not depending on
>> anything users should select.
> 
> Actually I think Mario's idea is just fine.  Non-user-accessible Kconfig
> symbols are used a lot to invoke an #ifdef elsewhere in the code;
> compare this with his proposal is a bit different but not too much.
> 
> Sometimes #defines are used, sometimes Kconfig symbols, but the idea is
> the same.
> 
> Paolo

Hi Paolo,
  thanks for your feedback. I forgot to add that I tried define 
ARCH_HAVE_... approach but checkpatch rejected it and insisted
on Kconfig.

Thanks,
- Mario

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-07-03 15:04               ` Christoffer Dall
@ 2014-07-04 16:29                 ` Paolo Bonzini
  2014-07-17 16:00                   ` Mario Smarduch
  2014-07-17 16:17                 ` Mario Smarduch
  1 sibling, 1 reply; 17+ messages in thread
From: Paolo Bonzini @ 2014-07-04 16:29 UTC (permalink / raw)
  To: linux-arm-kernel

Il 03/07/2014 17:04, Christoffer Dall ha scritto:
> Hmmm, I'm really not an expert in the 'established procedures' for what
> to put in config files etc., but here's my basic take:
>
> a) you wouldn't put a config option in Kconfig unless it's comething
> that's actually configurable or some generic feature/subsystem that
> should only be enabled if hardware has certain capabilities or other
> config options enabled.
>
> b) this seems entirely an implementation issue and not depending on
> anything users should select.

Actually I think Mario's idea is just fine.  Non-user-accessible Kconfig 
symbols are used a lot to invoke an #ifdef elsewhere in the code; 
compare this with his proposal is a bit different but not too much.

Sometimes #defines are used, sometimes Kconfig symbols, but the idea is 
the same.

Paolo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-18  1:41             ` Mario Smarduch
@ 2014-07-03 15:04               ` Christoffer Dall
  2014-07-04 16:29                 ` Paolo Bonzini
  2014-07-17 16:17                 ` Mario Smarduch
  0 siblings, 2 replies; 17+ messages in thread
From: Christoffer Dall @ 2014-07-03 15:04 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jun 17, 2014 at 06:41:52PM -0700, Mario Smarduch wrote:
> On 06/11/2014 12:03 AM, Christoffer Dall wrote:
> 
> >>
> >> There is also the issue of kvm_flush_remote_tlbs(), that's also weak,
> >> the generic one is using IPIs. Since it's only used in mmu.c maybe make 
> >> this one static.
> >>
> > So I don't see a lot of use of weak symbols in kvm_main.c (actually on
> > kvmarm/next I don't see any), but we do want to share code when more
> > than one architecture implements something in the exact same way, like
> > it seems x86 and ARM is doing here for this particular function.
> > 
> > I think the KVM scheme is usually to check for some define, like:
> > 
> > #ifdef KVM_ARCH_HAVE_GET_DIRTY_LOG
> > 	ret = kvm_arch_get_dirty_log(...);
> > #else
> > 	ret = kvm_get_dirty_log(...);
> > #endif
> > 
> > but Paolo may have a more informed oppinion of how to deal with these.
> > 
> > Thanks,
> > -Christoffer
> >
> 
>  
> One approach I'm trying looking at the code in kvm_main().
> This approach applies more to selecting features as opposed to
> selecting generic vs architecture specific functions.
> 
> 1.-------------------------------------------------
>  - add to 'virt/kvm/Kconfig'
> config HAVE_KVM_ARCH_TLB_FLUSH_ALL
>        bool
> 
> config HAVE_KVM_ARCH_DIRTY_LOG
>        bool
> 2.--------------------------------------------------
> For ARM and later ARM64 add to 'arch/arm[64]/kvm/Kconfig'
> config KVM
>         bool "Kernel-based Virtual Machine (KVM) support"
> ...
> select HAVE_KVM_ARCH_TLB_FLUSH_ALL
> ..
> 
> Not for HAVE_KVM_ARCH_DIRTY_LOG given it's shared with x86,
> but would need to do it for every other architecture that
> does not share it (except initially for arm64 since it
> will use the variant that returns -EINVAL until feature
> is supported)
> 
> 3------------------------------------------------------
> In kvm_main.c would have something like
> 
> void kvm_flush_remote_tlbs(struct kvm *kvm)
> {
> #ifdef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
>         kvm_arch_flush_remote_tlbs(kvm);
> #else
>         long dirty_count = kvm->tlbs_dirty;
> 
>         smp_mb();
>         if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
>                 ++kvm->stat.remote_tlb_flush;
>         cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
> #endif
> }
> 
> Then add void kvm_flush_remote_tlbs(struct kvm *kvm) definition
> to arm kvm_host.h. Define the function in this case mmu.c
> 
> For the dirty log function
> int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>                                                 struct kvm_dirty_log *log)
> {
> #ifdef CONFIG_HAVE_KVM_ARCH_DIRTY_LOG
>         kvm_arch_vm_ioctl_get_dirty_log(kvm, log);
> #else
>         int r;
>         struct kvm_memory_slot *memslot;
>         unsigned long n, i;
>         unsigned long *dirty_bitmap;
>         unsigned long *dirty_bitmap_buffer;
>         bool is_dirty = false;
> 	...
> 
> But then you have to go into every architecture and define the
> kvm_arch_vm_...() variant.
> 
> Is this the right way to go? Or is there a simpler way?
> 
Hmmm, I'm really not an expert in the 'established procedures' for what
to put in config files etc., but here's my basic take:

a) you wouldn't put a config option in Kconfig unless it's comething
that's actually configurable or some generic feature/subsystem that
should only be enabled if hardware has certain capabilities or other
config options enabled.

b) this seems entirely an implementation issue and not depending on
anything users should select.

c) therefore, I think it's either a question of always having an
arch-specific implementation that you probe for its return value or you
have some sort of define in the header files for the
arch/X/include/asm/kvm_host.h to control what you need.

-Christoffer

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-11  7:03           ` Christoffer Dall
  2014-06-12  3:02             ` Mario Smarduch
@ 2014-06-18  1:41             ` Mario Smarduch
  2014-07-03 15:04               ` Christoffer Dall
  1 sibling, 1 reply; 17+ messages in thread
From: Mario Smarduch @ 2014-06-18  1:41 UTC (permalink / raw)
  To: linux-arm-kernel

On 06/11/2014 12:03 AM, Christoffer Dall wrote:

>>
>> There is also the issue of kvm_flush_remote_tlbs(), that's also weak,
>> the generic one is using IPIs. Since it's only used in mmu.c maybe make 
>> this one static.
>>
> So I don't see a lot of use of weak symbols in kvm_main.c (actually on
> kvmarm/next I don't see any), but we do want to share code when more
> than one architecture implements something in the exact same way, like
> it seems x86 and ARM is doing here for this particular function.
> 
> I think the KVM scheme is usually to check for some define, like:
> 
> #ifdef KVM_ARCH_HAVE_GET_DIRTY_LOG
> 	ret = kvm_arch_get_dirty_log(...);
> #else
> 	ret = kvm_get_dirty_log(...);
> #endif
> 
> but Paolo may have a more informed oppinion of how to deal with these.
> 
> Thanks,
> -Christoffer
>

 
One approach I'm trying looking at the code in kvm_main().
This approach applies more to selecting features as opposed to
selecting generic vs architecture specific functions.

1.-------------------------------------------------
 - add to 'virt/kvm/Kconfig'
config HAVE_KVM_ARCH_TLB_FLUSH_ALL
       bool

config HAVE_KVM_ARCH_DIRTY_LOG
       bool
2.--------------------------------------------------
For ARM and later ARM64 add to 'arch/arm[64]/kvm/Kconfig'
config KVM
        bool "Kernel-based Virtual Machine (KVM) support"
...
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
..

Not for HAVE_KVM_ARCH_DIRTY_LOG given it's shared with x86,
but would need to do it for every other architecture that
does not share it (except initially for arm64 since it
will use the variant that returns -EINVAL until feature
is supported)

3------------------------------------------------------
In kvm_main.c would have something like

void kvm_flush_remote_tlbs(struct kvm *kvm)
{
#ifdef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
        kvm_arch_flush_remote_tlbs(kvm);
#else
        long dirty_count = kvm->tlbs_dirty;

        smp_mb();
        if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                ++kvm->stat.remote_tlb_flush;
        cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
#endif
}

Then add void kvm_flush_remote_tlbs(struct kvm *kvm) definition
to arm kvm_host.h. Define the function in this case mmu.c

For the dirty log function
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
                                                struct kvm_dirty_log *log)
{
#ifdef CONFIG_HAVE_KVM_ARCH_DIRTY_LOG
        kvm_arch_vm_ioctl_get_dirty_log(kvm, log);
#else
        int r;
        struct kvm_memory_slot *memslot;
        unsigned long n, i;
        unsigned long *dirty_bitmap;
        unsigned long *dirty_bitmap_buffer;
        bool is_dirty = false;
	...

But then you have to go into every architecture and define the
kvm_arch_vm_...() variant.

Is this the right way to go? Or is there a simpler way?

Thanks,
- Mario

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-11  7:03           ` Christoffer Dall
@ 2014-06-12  3:02             ` Mario Smarduch
  2014-06-18  1:41             ` Mario Smarduch
  1 sibling, 0 replies; 17+ messages in thread
From: Mario Smarduch @ 2014-06-12  3:02 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Paolo,
   for ARM dirty page logging we have couple functions
that are generic.

- kvm_vm_ioctl_get_dirty_log - is identical to x86 version
- kvm_flush_remote_tlbs - ARM version does hardware broadcast
  it's different from the generic one in kvm_main.c

How to proceed to make these generic? Please see below
from Christoffer.

Current patch moves kvm_vm_ioctl_get_dirty_log() into kvm_main.c
and labels it and kvm_flush_remote_tlbs weak.

Please advise.

Thanks,
- Mario


> So I don't see a lot of use of weak symbols in kvm_main.c (actually on
> kvmarm/next I don't see any), but we do want to share code when more
> than one architecture implements something in the exact same way, like
> it seems x86 and ARM is doing here for this particular function.
> 
> I think the KVM scheme is usually to check for some define, like:
> 
> #ifdef KVM_ARCH_HAVE_GET_DIRTY_LOG
> 	ret = kvm_arch_get_dirty_log(...);
> #else
> 	ret = kvm_get_dirty_log(...);
> #endif
> 
> but Paolo may have a more informed oppinion of how to deal with these.
> 
> Thanks,
> -Christoffer
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-10 18:08         ` Mario Smarduch
@ 2014-06-11  7:03           ` Christoffer Dall
  2014-06-12  3:02             ` Mario Smarduch
  2014-06-18  1:41             ` Mario Smarduch
  0 siblings, 2 replies; 17+ messages in thread
From: Christoffer Dall @ 2014-06-11  7:03 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jun 10, 2014 at 11:08:24AM -0700, Mario Smarduch wrote:
> On 06/10/2014 02:22 AM, Christoffer Dall wrote:
> > On Mon, Jun 09, 2014 at 06:47:12PM -0700, Mario Smarduch wrote:
> >> On 06/08/2014 05:05 AM, Christoffer Dall wrote:
> >>> On Fri, Jun 06, 2014 at 10:33:41AM -0700, Mario Smarduch wrote:
> >>>> kvm_vm_ioctl_get_dirty_log() is generic used by x86, ARM. x86 recent patch 
> >>>> changed this function, this patch picks up those changes, re-tested everything
> >>>> works. Applies cleanly with other patches.
> >>>>
> >>>> This patch adds support for keeping track of VM dirty pages. As dirty page log
> >>>> is retrieved, the pages that have been written are write protected again for
> >>>> next write and log read.
> >>>>
> >>>> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
> >>>> ---
> >>>>  arch/arm/include/asm/kvm_host.h |    3 ++
> >>>>  arch/arm/kvm/arm.c              |    5 ---
> >>>>  arch/arm/kvm/mmu.c              |   79 +++++++++++++++++++++++++++++++++++
> >>>>  arch/x86/kvm/x86.c              |   86 ---------------------------------------
> >>>>  virt/kvm/kvm_main.c             |   86 +++++++++++++++++++++++++++++++++++++++
> >>>>  5 files changed, 168 insertions(+), 91 deletions(-)
> >>>>
> >>>> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> >>>> index 59565f5..b760f9c 100644
> >>>> --- a/arch/arm/include/asm/kvm_host.h
> >>>> +++ b/arch/arm/include/asm/kvm_host.h
> >>>> @@ -232,5 +232,8 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
> >>>>  int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
> >>>>  
> >>>>  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
> >>>> +void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
> >>>> +	struct kvm_memory_slot *slot,
> >>>> +	gfn_t gfn_offset, unsigned long mask);
> >>>
> >>> Do all other architectures implement this function?  arm64?
> >>
> >> Besides arm, x86 but the function is not generic.
> >>>
> > 
> > you're now calling this from generic code, so all architecture must
> > implement it, and the prototype should proably be in
> > include/linux/kvm_host.h, not in the arch-specific headers.
> Ah ok.
> > 
> >>>>  
> >>>>  #endif /* __ARM_KVM_HOST_H__ */
> >>>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> >>>> index dfd63ac..f06fb21 100644
> >>>> --- a/arch/arm/kvm/arm.c
> >>>> +++ b/arch/arm/kvm/arm.c
> >>>> @@ -780,11 +780,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> >>>>  	}
> >>>>  }
> >>>>  
> >>>> -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
> >>>> -{
> >>>> -	return -EINVAL;
> >>>> -}
> >>>> -
> >>>
> >>> What about the other architectures implementing this function?
> >>
> >> Six architectures define this function. With this patch this
> >> function is generic in kvm_main.c used by x86.
> > 
> > But you're not defining it as a weak symbol (and I don't suspect that
> > you should unless other archs do this in a *very* different way), so you
> > need to either remove it from the other archs, make it a weak symbol (I
> > hope this is not the case) or do something else.
> Mistake on my part I just cut and paste Xiaos x86's recent upstream patch and 
> didn't add weak definition.
> 
> I looked at IA64, MIPS (two of them ), S390 somewhat similar but quite 
> different implementations. They use a sync version, where the dirty bitmaps 
> are maintained at arch level and then copied to memslot->dirty_bitmap. There 
> is only commonality between x86 and ARM right now, x86 uses
> memslot->dirty_bitmap directly.
> 
> Maybe this function should go back to architecture layer, it's
> unlikely it can become generic across all architectures.
> 
> There is also the issue of kvm_flush_remote_tlbs(), that's also weak,
> the generic one is using IPIs. Since it's only used in mmu.c maybe make 
> this one static.
> 
So I don't see a lot of use of weak symbols in kvm_main.c (actually on
kvmarm/next I don't see any), but we do want to share code when more
than one architecture implements something in the exact same way, like
it seems x86 and ARM is doing here for this particular function.

I think the KVM scheme is usually to check for some define, like:

#ifdef KVM_ARCH_HAVE_GET_DIRTY_LOG
	ret = kvm_arch_get_dirty_log(...);
#else
	ret = kvm_get_dirty_log(...);
#endif

but Paolo may have a more informed oppinion of how to deal with these.

Thanks,
-Christoffer

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-10  9:22       ` Christoffer Dall
@ 2014-06-10 18:08         ` Mario Smarduch
  2014-06-11  7:03           ` Christoffer Dall
  0 siblings, 1 reply; 17+ messages in thread
From: Mario Smarduch @ 2014-06-10 18:08 UTC (permalink / raw)
  To: linux-arm-kernel

On 06/10/2014 02:22 AM, Christoffer Dall wrote:
> On Mon, Jun 09, 2014 at 06:47:12PM -0700, Mario Smarduch wrote:
>> On 06/08/2014 05:05 AM, Christoffer Dall wrote:
>>> On Fri, Jun 06, 2014 at 10:33:41AM -0700, Mario Smarduch wrote:
>>>> kvm_vm_ioctl_get_dirty_log() is generic used by x86, ARM. x86 recent patch 
>>>> changed this function, this patch picks up those changes, re-tested everything
>>>> works. Applies cleanly with other patches.
>>>>
>>>> This patch adds support for keeping track of VM dirty pages. As dirty page log
>>>> is retrieved, the pages that have been written are write protected again for
>>>> next write and log read.
>>>>
>>>> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
>>>> ---
>>>>  arch/arm/include/asm/kvm_host.h |    3 ++
>>>>  arch/arm/kvm/arm.c              |    5 ---
>>>>  arch/arm/kvm/mmu.c              |   79 +++++++++++++++++++++++++++++++++++
>>>>  arch/x86/kvm/x86.c              |   86 ---------------------------------------
>>>>  virt/kvm/kvm_main.c             |   86 +++++++++++++++++++++++++++++++++++++++
>>>>  5 files changed, 168 insertions(+), 91 deletions(-)
>>>>
>>>> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
>>>> index 59565f5..b760f9c 100644
>>>> --- a/arch/arm/include/asm/kvm_host.h
>>>> +++ b/arch/arm/include/asm/kvm_host.h
>>>> @@ -232,5 +232,8 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
>>>>  int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
>>>>  
>>>>  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
>>>> +void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
>>>> +	struct kvm_memory_slot *slot,
>>>> +	gfn_t gfn_offset, unsigned long mask);
>>>
>>> Do all other architectures implement this function?  arm64?
>>
>> Besides arm, x86 but the function is not generic.
>>>
> 
> you're now calling this from generic code, so all architecture must
> implement it, and the prototype should proably be in
> include/linux/kvm_host.h, not in the arch-specific headers.
Ah ok.
> 
>>>>  
>>>>  #endif /* __ARM_KVM_HOST_H__ */
>>>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>>>> index dfd63ac..f06fb21 100644
>>>> --- a/arch/arm/kvm/arm.c
>>>> +++ b/arch/arm/kvm/arm.c
>>>> @@ -780,11 +780,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>>>>  	}
>>>>  }
>>>>  
>>>> -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
>>>> -{
>>>> -	return -EINVAL;
>>>> -}
>>>> -
>>>
>>> What about the other architectures implementing this function?
>>
>> Six architectures define this function. With this patch this
>> function is generic in kvm_main.c used by x86.
> 
> But you're not defining it as a weak symbol (and I don't suspect that
> you should unless other archs do this in a *very* different way), so you
> need to either remove it from the other archs, make it a weak symbol (I
> hope this is not the case) or do something else.
Mistake on my part I just cut and paste Xiaos x86's recent upstream patch and 
didn't add weak definition.

I looked at IA64, MIPS (two of them ), S390 somewhat similar but quite 
different implementations. They use a sync version, where the dirty bitmaps 
are maintained at arch level and then copied to memslot->dirty_bitmap. There 
is only commonality between x86 and ARM right now, x86 uses
memslot->dirty_bitmap directly.

Maybe this function should go back to architecture layer, it's
unlikely it can become generic across all architectures.

There is also the issue of kvm_flush_remote_tlbs(), that's also weak,
the generic one is using IPIs. Since it's only used in mmu.c maybe make 
this one static.


> 
> -Christoffer
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-10  1:47     ` Mario Smarduch
@ 2014-06-10  9:22       ` Christoffer Dall
  2014-06-10 18:08         ` Mario Smarduch
  0 siblings, 1 reply; 17+ messages in thread
From: Christoffer Dall @ 2014-06-10  9:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jun 09, 2014 at 06:47:12PM -0700, Mario Smarduch wrote:
> On 06/08/2014 05:05 AM, Christoffer Dall wrote:
> > On Fri, Jun 06, 2014 at 10:33:41AM -0700, Mario Smarduch wrote:
> >> kvm_vm_ioctl_get_dirty_log() is generic used by x86, ARM. x86 recent patch 
> >> changed this function, this patch picks up those changes, re-tested everything
> >> works. Applies cleanly with other patches.
> >>
> >> This patch adds support for keeping track of VM dirty pages. As dirty page log
> >> is retrieved, the pages that have been written are write protected again for
> >> next write and log read.
> >>
> >> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
> >> ---
> >>  arch/arm/include/asm/kvm_host.h |    3 ++
> >>  arch/arm/kvm/arm.c              |    5 ---
> >>  arch/arm/kvm/mmu.c              |   79 +++++++++++++++++++++++++++++++++++
> >>  arch/x86/kvm/x86.c              |   86 ---------------------------------------
> >>  virt/kvm/kvm_main.c             |   86 +++++++++++++++++++++++++++++++++++++++
> >>  5 files changed, 168 insertions(+), 91 deletions(-)
> >>
> >> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> >> index 59565f5..b760f9c 100644
> >> --- a/arch/arm/include/asm/kvm_host.h
> >> +++ b/arch/arm/include/asm/kvm_host.h
> >> @@ -232,5 +232,8 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
> >>  int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
> >>  
> >>  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
> >> +void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
> >> +	struct kvm_memory_slot *slot,
> >> +	gfn_t gfn_offset, unsigned long mask);
> > 
> > Do all other architectures implement this function?  arm64?
> 
> Besides arm, x86 but the function is not generic.
> > 

you're now calling this from generic code, so all architecture must
implement it, and the prototype should proably be in
include/linux/kvm_host.h, not in the arch-specific headers.

> >>  
> >>  #endif /* __ARM_KVM_HOST_H__ */
> >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> >> index dfd63ac..f06fb21 100644
> >> --- a/arch/arm/kvm/arm.c
> >> +++ b/arch/arm/kvm/arm.c
> >> @@ -780,11 +780,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> >>  	}
> >>  }
> >>  
> >> -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
> >> -{
> >> -	return -EINVAL;
> >> -}
> >> -
> > 
> > What about the other architectures implementing this function?
> 
> Six architectures define this function. With this patch this
> function is generic in kvm_main.c used by x86.

But you're not defining it as a weak symbol (and I don't suspect that
you should unless other archs do this in a *very* different way), so you
need to either remove it from the other archs, make it a weak symbol (I
hope this is not the case) or do something else.

-Christoffer

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-08 12:05   ` Christoffer Dall
@ 2014-06-10  1:47     ` Mario Smarduch
  2014-06-10  9:22       ` Christoffer Dall
  0 siblings, 1 reply; 17+ messages in thread
From: Mario Smarduch @ 2014-06-10  1:47 UTC (permalink / raw)
  To: linux-arm-kernel

On 06/08/2014 05:05 AM, Christoffer Dall wrote:
> On Fri, Jun 06, 2014 at 10:33:41AM -0700, Mario Smarduch wrote:
>> kvm_vm_ioctl_get_dirty_log() is generic used by x86, ARM. x86 recent patch 
>> changed this function, this patch picks up those changes, re-tested everything
>> works. Applies cleanly with other patches.
>>
>> This patch adds support for keeping track of VM dirty pages. As dirty page log
>> is retrieved, the pages that have been written are write protected again for
>> next write and log read.
>>
>> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
>> ---
>>  arch/arm/include/asm/kvm_host.h |    3 ++
>>  arch/arm/kvm/arm.c              |    5 ---
>>  arch/arm/kvm/mmu.c              |   79 +++++++++++++++++++++++++++++++++++
>>  arch/x86/kvm/x86.c              |   86 ---------------------------------------
>>  virt/kvm/kvm_main.c             |   86 +++++++++++++++++++++++++++++++++++++++
>>  5 files changed, 168 insertions(+), 91 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
>> index 59565f5..b760f9c 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -232,5 +232,8 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
>>  int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
>>  
>>  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
>> +void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
>> +	struct kvm_memory_slot *slot,
>> +	gfn_t gfn_offset, unsigned long mask);
> 
> Do all other architectures implement this function?  arm64?

Besides arm, x86 but the function is not generic.
> 
>>  
>>  #endif /* __ARM_KVM_HOST_H__ */
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index dfd63ac..f06fb21 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -780,11 +780,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>>  	}
>>  }
>>  
>> -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
>> -{
>> -	return -EINVAL;
>> -}
>> -
> 
> What about the other architectures implementing this function?

Six architectures define this function. With this patch this
function is generic in kvm_main.c used by x86.
> 
>>  static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
>>  					struct kvm_arm_device_addr *dev_addr)
>>  {
>> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
>> index e5dff85..907344c 100644
>> --- a/arch/arm/kvm/mmu.c
>> +++ b/arch/arm/kvm/mmu.c
>> @@ -874,6 +874,85 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
>>  	spin_unlock(&kvm->mmu_lock);
>>  }
>>  
>> +/**
>> + * stage2_wp_mask_range() - write protect memslot pages set in mask
>> + * @pmd - pointer to page table
>> + * @start_ipa - the start range of mask
>> + * @addr - start_ipa or start range of adjusted mask if crossing PMD range
>> + * @mask - mask of dirty pages
>> + *
>> + * Walk mask and write protect the associated dirty pages in the memory region.
>> + * If mask crosses a PMD range adjust it to next page table and return.
>> + */
>> +static void stage2_wp_mask_range(pmd_t *pmd, phys_addr_t start_ipa,
>> +		phys_addr_t *addr, unsigned long *mask)
>> +{
>> +	pte_t *pte;
>> +	bool crosses_pmd;
>> +	int i = __ffs(*mask);
>> +
>> +	if (unlikely(*addr > start_ipa))
>> +		start_ipa = *addr - i * PAGE_SIZE;
> 
> huh?
> 
>> +	pte = pte_offset_kernel(pmd, start_ipa);
>> +	for (*addr = start_ipa + i * PAGE_SIZE; *mask;
>> +		i = __ffs(*mask), *addr = start_ipa + i * PAGE_SIZE) {
>> +		crosses_pmd = !!((start_ipa & PMD_MASK) ^ (*addr & PMD_MASK));
>> +		if (unlikely(crosses_pmd)) {
>> +			/* Adjust mask dirty bits relative to next page table */
>> +			*mask >>= (PTRS_PER_PTE - pte_index(start_ipa));
>> +			return;
>> +		}
>> +		if (!pte_none(pte[i]))
>> +			kvm_set_s2pte_readonly(&pte[i]);
>> +		*mask &= ~(1 << i);
> 
> This is *really* complicated, and *really* unintuitive and *really* hard
> to read!
> 
> I feel this may very likely break, and is optimizing prematurely for
> some very special case.  Can't you follow the usual scheme of traversing
> the levels one-by-one and just calculate the 'end' address based on the
> number of bits in your long, and just adjust the mask in the calling
> function each time you are about to call a lower-level function?

Agreed I'll extend wp_range functions, it probably makes no sense
to be optimizing at this phase.

> 
> In fact, I think this could be trivially implemented as an extension to
> your existing wp_range functions.  On ARM you are mostly going to
> consider 32 pages, on arm64 you are mostly going to consider 64 pages,
> just calculate that range in terms of IPAs and set that as the limit for
> calling stage2_wp_pgd_range (which should be factor'ed out into its
> function and called from kvm_mmu_wp_memory_region).
> 
> 
> 
>> +	}
>> +}
>> +
>> +/**
>> + * kvm_mmu_write_protected_pt_masked() - write protect dirty pages set in mask
>> + * @kvm:        The KVM pointer
>> + * @slot:       The memory slot associated with mask
>> + * @gfn_offset: The gfn offset in memory slot
>> + * @mask:       The mask of dirty pages at offset 'gnf_offset' in this memory
> 
> s/gnf_offset/gfn_offset/
ok
> 
>> + *              slot to be write protected
>> + *
>> + * Called from dirty page logging read function to write protect bits set in
>> + * mask to record future writes to these pages in dirty page log. This function
>> + * uses simplified page table walk given  mask can spawn no more then 2 PMD
> 
> random double white space before mask
ok
> 
>> + * table range.
>> + * 'kvm->mmu_lock' must be  held to protect against concurrent modification
>> + * of page tables (2nd stage fault, mmu modifiers, ...)
>> + *
>> + */
>> +void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
>> +		struct kvm_memory_slot *slot,
>> +		gfn_t gfn_offset, unsigned long mask)
>> +{
>> +	pud_t *pud;
>> +	pmd_t *pmd;
>> +	phys_addr_t start_ipa = (slot->base_gfn + gfn_offset) << PAGE_SHIFT;
>> +	phys_addr_t end_ipa = start_ipa + BITS_PER_LONG * PAGE_SIZE;
>> +	phys_addr_t addr = start_ipa;
>> +	pgd_t *pgdp = kvm->arch.pgd, *pgd;
>> +
>> +	do {
>> +		pgd = pgdp + pgd_index(addr);
>> +		if (pgd_present(*pgd)) {
>> +			pud = pud_offset(pgd, addr);
>> +			if (!pud_none(*pud) && !pud_huge(*pud)) {
>> +				pmd = pmd_offset(pud, addr);
>> +				if (!pmd_none(*pmd) && !kvm_pmd_huge(*pmd))
>> +					stage2_wp_mask_range(pmd, start_ipa,
>> +								&addr, &mask);
>> +				else
>> +					addr += PMD_SIZE;
>> +			} else
>> +				addr += PUD_SIZE;
> 
> is this correct? what if your gfn_offset puts you at the last page of a
> PUD, don't you need pud_addr_end() instead?
> 
>> +		} else
>> +			addr += PGDIR_SIZE;
> 
> please use braces for both of the single-line else-clauses above when
> the main if-clause is multi-line (see Documentation/CodingStyle chapter
> 3, just before 3.1).
> 
>> +	} while (mask && addr < end_ipa);
> 
> this seems like a complicated loop condition.  It seems to me that
> either you clear all the bits in the mask you want to check or you check
> for an address range, why is there a need for both?
> 
>> +}
>> +
>>  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>  			  struct kvm_memory_slot *memslot,
>>  			  unsigned long fault_status)
> 
> [...]
> 
> Thanks,
> -Christoffer
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
  2014-06-06 17:33 ` [RESEND PATCH v7 3/4] arm: dirty log write protect management support Mario Smarduch
@ 2014-06-08 12:05   ` Christoffer Dall
  2014-06-10  1:47     ` Mario Smarduch
  0 siblings, 1 reply; 17+ messages in thread
From: Christoffer Dall @ 2014-06-08 12:05 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jun 06, 2014 at 10:33:41AM -0700, Mario Smarduch wrote:
> kvm_vm_ioctl_get_dirty_log() is generic used by x86, ARM. x86 recent patch 
> changed this function, this patch picks up those changes, re-tested everything
> works. Applies cleanly with other patches.
> 
> This patch adds support for keeping track of VM dirty pages. As dirty page log
> is retrieved, the pages that have been written are write protected again for
> next write and log read.
> 
> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
> ---
>  arch/arm/include/asm/kvm_host.h |    3 ++
>  arch/arm/kvm/arm.c              |    5 ---
>  arch/arm/kvm/mmu.c              |   79 +++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.c              |   86 ---------------------------------------
>  virt/kvm/kvm_main.c             |   86 +++++++++++++++++++++++++++++++++++++++
>  5 files changed, 168 insertions(+), 91 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 59565f5..b760f9c 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -232,5 +232,8 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
>  int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
>  
>  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
> +void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
> +	struct kvm_memory_slot *slot,
> +	gfn_t gfn_offset, unsigned long mask);

Do all other architectures implement this function?  arm64?

>  
>  #endif /* __ARM_KVM_HOST_H__ */
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index dfd63ac..f06fb21 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -780,11 +780,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>  	}
>  }
>  
> -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
> -{
> -	return -EINVAL;
> -}
> -

What about the other architectures implementing this function?

>  static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
>  					struct kvm_arm_device_addr *dev_addr)
>  {
> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index e5dff85..907344c 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -874,6 +874,85 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
>  	spin_unlock(&kvm->mmu_lock);
>  }
>  
> +/**
> + * stage2_wp_mask_range() - write protect memslot pages set in mask
> + * @pmd - pointer to page table
> + * @start_ipa - the start range of mask
> + * @addr - start_ipa or start range of adjusted mask if crossing PMD range
> + * @mask - mask of dirty pages
> + *
> + * Walk mask and write protect the associated dirty pages in the memory region.
> + * If mask crosses a PMD range adjust it to next page table and return.
> + */
> +static void stage2_wp_mask_range(pmd_t *pmd, phys_addr_t start_ipa,
> +		phys_addr_t *addr, unsigned long *mask)
> +{
> +	pte_t *pte;
> +	bool crosses_pmd;
> +	int i = __ffs(*mask);
> +
> +	if (unlikely(*addr > start_ipa))
> +		start_ipa = *addr - i * PAGE_SIZE;

huh?

> +	pte = pte_offset_kernel(pmd, start_ipa);
> +	for (*addr = start_ipa + i * PAGE_SIZE; *mask;
> +		i = __ffs(*mask), *addr = start_ipa + i * PAGE_SIZE) {
> +		crosses_pmd = !!((start_ipa & PMD_MASK) ^ (*addr & PMD_MASK));
> +		if (unlikely(crosses_pmd)) {
> +			/* Adjust mask dirty bits relative to next page table */
> +			*mask >>= (PTRS_PER_PTE - pte_index(start_ipa));
> +			return;
> +		}
> +		if (!pte_none(pte[i]))
> +			kvm_set_s2pte_readonly(&pte[i]);
> +		*mask &= ~(1 << i);

This is *really* complicated, and *really* unintuitive and *really* hard
to read!

I feel this may very likely break, and is optimizing prematurely for
some very special case.  Can't you follow the usual scheme of traversing
the levels one-by-one and just calculate the 'end' address based on the
number of bits in your long, and just adjust the mask in the calling
function each time you are about to call a lower-level function?

In fact, I think this could be trivially implemented as an extension to
your existing wp_range functions.  On ARM you are mostly going to
consider 32 pages, on arm64 you are mostly going to consider 64 pages,
just calculate that range in terms of IPAs and set that as the limit for
calling stage2_wp_pgd_range (which should be factor'ed out into its
function and called from kvm_mmu_wp_memory_region).



> +	}
> +}
> +
> +/**
> + * kvm_mmu_write_protected_pt_masked() - write protect dirty pages set in mask
> + * @kvm:        The KVM pointer
> + * @slot:       The memory slot associated with mask
> + * @gfn_offset: The gfn offset in memory slot
> + * @mask:       The mask of dirty pages at offset 'gnf_offset' in this memory

s/gnf_offset/gfn_offset/

> + *              slot to be write protected
> + *
> + * Called from dirty page logging read function to write protect bits set in
> + * mask to record future writes to these pages in dirty page log. This function
> + * uses simplified page table walk given  mask can spawn no more then 2 PMD

random double white space before mask

> + * table range.
> + * 'kvm->mmu_lock' must be  held to protect against concurrent modification
> + * of page tables (2nd stage fault, mmu modifiers, ...)
> + *
> + */
> +void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
> +		struct kvm_memory_slot *slot,
> +		gfn_t gfn_offset, unsigned long mask)
> +{
> +	pud_t *pud;
> +	pmd_t *pmd;
> +	phys_addr_t start_ipa = (slot->base_gfn + gfn_offset) << PAGE_SHIFT;
> +	phys_addr_t end_ipa = start_ipa + BITS_PER_LONG * PAGE_SIZE;
> +	phys_addr_t addr = start_ipa;
> +	pgd_t *pgdp = kvm->arch.pgd, *pgd;
> +
> +	do {
> +		pgd = pgdp + pgd_index(addr);
> +		if (pgd_present(*pgd)) {
> +			pud = pud_offset(pgd, addr);
> +			if (!pud_none(*pud) && !pud_huge(*pud)) {
> +				pmd = pmd_offset(pud, addr);
> +				if (!pmd_none(*pmd) && !kvm_pmd_huge(*pmd))
> +					stage2_wp_mask_range(pmd, start_ipa,
> +								&addr, &mask);
> +				else
> +					addr += PMD_SIZE;
> +			} else
> +				addr += PUD_SIZE;

is this correct? what if your gfn_offset puts you at the last page of a
PUD, don't you need pud_addr_end() instead?

> +		} else
> +			addr += PGDIR_SIZE;

please use braces for both of the single-line else-clauses above when
the main if-clause is multi-line (see Documentation/CodingStyle chapter
3, just before 3.1).

> +	} while (mask && addr < end_ipa);

this seems like a complicated loop condition.  It seems to me that
either you clear all the bits in the mask you want to check or you check
for an address range, why is there a need for both?

> +}
> +
>  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  			  struct kvm_memory_slot *memslot,
>  			  unsigned long fault_status)

[...]

Thanks,
-Christoffer

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [RESEND PATCH v7 3/4] arm: dirty log write protect management support
@ 2014-06-06 17:33 ` Mario Smarduch
  2014-06-08 12:05   ` Christoffer Dall
  0 siblings, 1 reply; 17+ messages in thread
From: Mario Smarduch @ 2014-06-06 17:33 UTC (permalink / raw)
  To: linux-arm-kernel

kvm_vm_ioctl_get_dirty_log() is generic used by x86, ARM. x86 recent patch 
changed this function, this patch picks up those changes, re-tested everything
works. Applies cleanly with other patches.

This patch adds support for keeping track of VM dirty pages. As dirty page log
is retrieved, the pages that have been written are write protected again for
next write and log read.

Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
 arch/arm/include/asm/kvm_host.h |    3 ++
 arch/arm/kvm/arm.c              |    5 ---
 arch/arm/kvm/mmu.c              |   79 +++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |   86 ---------------------------------------
 virt/kvm/kvm_main.c             |   86 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 168 insertions(+), 91 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 59565f5..b760f9c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -232,5 +232,8 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+	struct kvm_memory_slot *slot,
+	gfn_t gfn_offset, unsigned long mask);
 
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dfd63ac..f06fb21 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -780,11 +780,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	}
 }
 
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	return -EINVAL;
-}
-
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 					struct kvm_arm_device_addr *dev_addr)
 {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index e5dff85..907344c 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -874,6 +874,85 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 	spin_unlock(&kvm->mmu_lock);
 }
 
+/**
+ * stage2_wp_mask_range() - write protect memslot pages set in mask
+ * @pmd - pointer to page table
+ * @start_ipa - the start range of mask
+ * @addr - start_ipa or start range of adjusted mask if crossing PMD range
+ * @mask - mask of dirty pages
+ *
+ * Walk mask and write protect the associated dirty pages in the memory region.
+ * If mask crosses a PMD range adjust it to next page table and return.
+ */
+static void stage2_wp_mask_range(pmd_t *pmd, phys_addr_t start_ipa,
+		phys_addr_t *addr, unsigned long *mask)
+{
+	pte_t *pte;
+	bool crosses_pmd;
+	int i = __ffs(*mask);
+
+	if (unlikely(*addr > start_ipa))
+		start_ipa = *addr - i * PAGE_SIZE;
+	pte = pte_offset_kernel(pmd, start_ipa);
+	for (*addr = start_ipa + i * PAGE_SIZE; *mask;
+		i = __ffs(*mask), *addr = start_ipa + i * PAGE_SIZE) {
+		crosses_pmd = !!((start_ipa & PMD_MASK) ^ (*addr & PMD_MASK));
+		if (unlikely(crosses_pmd)) {
+			/* Adjust mask dirty bits relative to next page table */
+			*mask >>= (PTRS_PER_PTE - pte_index(start_ipa));
+			return;
+		}
+		if (!pte_none(pte[i]))
+			kvm_set_s2pte_readonly(&pte[i]);
+		*mask &= ~(1 << i);
+	}
+}
+
+/**
+ * kvm_mmu_write_protected_pt_masked() - write protect dirty pages set in mask
+ * @kvm:        The KVM pointer
+ * @slot:       The memory slot associated with mask
+ * @gfn_offset: The gfn offset in memory slot
+ * @mask:       The mask of dirty pages@offset 'gnf_offset' in this memory
+ *              slot to be write protected
+ *
+ * Called from dirty page logging read function to write protect bits set in
+ * mask to record future writes to these pages in dirty page log. This function
+ * uses simplified page table walk given  mask can spawn no more then 2 PMD
+ * table range.
+ * 'kvm->mmu_lock' must be  held to protect against concurrent modification
+ * of page tables (2nd stage fault, mmu modifiers, ...)
+ *
+ */
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+		struct kvm_memory_slot *slot,
+		gfn_t gfn_offset, unsigned long mask)
+{
+	pud_t *pud;
+	pmd_t *pmd;
+	phys_addr_t start_ipa = (slot->base_gfn + gfn_offset) << PAGE_SHIFT;
+	phys_addr_t end_ipa = start_ipa + BITS_PER_LONG * PAGE_SIZE;
+	phys_addr_t addr = start_ipa;
+	pgd_t *pgdp = kvm->arch.pgd, *pgd;
+
+	do {
+		pgd = pgdp + pgd_index(addr);
+		if (pgd_present(*pgd)) {
+			pud = pud_offset(pgd, addr);
+			if (!pud_none(*pud) && !pud_huge(*pud)) {
+				pmd = pmd_offset(pud, addr);
+				if (!pmd_none(*pmd) && !kvm_pmd_huge(*pmd))
+					stage2_wp_mask_range(pmd, start_ipa,
+								&addr, &mask);
+				else
+					addr += PMD_SIZE;
+			} else
+				addr += PUD_SIZE;
+		} else
+			addr += PGDIR_SIZE;
+	} while (mask && addr < end_ipa);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c5582c3..a603ca3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3569,92 +3569,6 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	return 0;
 }
 
-/**
- * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
- * @kvm: kvm instance
- * @log: slot id and address to which we copy the log
- *
- * We need to keep it in mind that VCPU threads can write to the bitmap
- * concurrently.  So, to avoid losing data, we keep the following order for
- * each bit:
- *
- *   1. Take a snapshot of the bit and clear it if needed.
- *   2. Write protect the corresponding page.
- *   3. Flush TLB's if needed.
- *   4. Copy the snapshot to the userspace.
- *
- * Between 2 and 3, the guest may write to the page using the remaining TLB
- * entry.  This is not a problem because the page will be reported dirty at
- * step 4 using the snapshot taken before and step 3 ensures that successive
- * writes will be logged for the next call.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	int r;
-	struct kvm_memory_slot *memslot;
-	unsigned long n, i;
-	unsigned long *dirty_bitmap;
-	unsigned long *dirty_bitmap_buffer;
-	bool is_dirty = false;
-
-	mutex_lock(&kvm->slots_lock);
-
-	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
-		goto out;
-
-	memslot = id_to_memslot(kvm->memslots, log->slot);
-
-	dirty_bitmap = memslot->dirty_bitmap;
-	r = -ENOENT;
-	if (!dirty_bitmap)
-		goto out;
-
-	n = kvm_dirty_bitmap_bytes(memslot);
-
-	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
-	memset(dirty_bitmap_buffer, 0, n);
-
-	spin_lock(&kvm->mmu_lock);
-
-	for (i = 0; i < n / sizeof(long); i++) {
-		unsigned long mask;
-		gfn_t offset;
-
-		if (!dirty_bitmap[i])
-			continue;
-
-		is_dirty = true;
-
-		mask = xchg(&dirty_bitmap[i], 0);
-		dirty_bitmap_buffer[i] = mask;
-
-		offset = i * BITS_PER_LONG;
-		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
-	}
-
-	spin_unlock(&kvm->mmu_lock);
-
-	/* See the comments in kvm_mmu_slot_remove_write_access(). */
-	lockdep_assert_held(&kvm->slots_lock);
-
-	/*
-	 * All the TLBs can be flushed out of mmu lock, see the comments in
-	 * kvm_mmu_slot_remove_write_access().
-	 */
-	if (is_dirty)
-		kvm_flush_remote_tlbs(kvm);
-
-	r = -EFAULT;
-	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
-		goto out;
-
-	r = 0;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 			bool line_status)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ba25765..c87c612 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -429,6 +429,92 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 	return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
 }
 
+/**
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
+ *
+ * We need to keep it in mind that VCPU threads can write to the bitmap
+ * concurrently.  So, to avoid losing data, we keep the following order for
+ * each bit:
+ *
+ *   1. Take a snapshot of the bit and clear it if needed.
+ *   2. Write protect the corresponding page.
+ *   3. Flush TLB's if needed.
+ *   4. Copy the snapshot to the userspace.
+ *
+ * Between 2 and 3, the guest may write to the page using the remaining TLB
+ * entry.  This is not a problem because the page will be reported dirty at
+ * step 4 using the snapshot taken before and step 3 ensures that successive
+ * writes will be logged for the next call.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+	int r;
+	struct kvm_memory_slot *memslot;
+	unsigned long n, i;
+	unsigned long *dirty_bitmap;
+	unsigned long *dirty_bitmap_buffer;
+	bool is_dirty = false;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	memslot = id_to_memslot(kvm->memslots, log->slot);
+
+	dirty_bitmap = memslot->dirty_bitmap;
+	r = -ENOENT;
+	if (!dirty_bitmap)
+		goto out;
+
+	n = kvm_dirty_bitmap_bytes(memslot);
+
+	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+	memset(dirty_bitmap_buffer, 0, n);
+
+	spin_lock(&kvm->mmu_lock);
+
+	for (i = 0; i < n / sizeof(long); i++) {
+		unsigned long mask;
+		gfn_t offset;
+
+		if (!dirty_bitmap[i])
+			continue;
+
+		is_dirty = true;
+
+		mask = xchg(&dirty_bitmap[i], 0);
+		dirty_bitmap_buffer[i] = mask;
+
+		offset = i * BITS_PER_LONG;
+		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
+	}
+
+	spin_unlock(&kvm->mmu_lock);
+
+	/* See the comments in kvm_mmu_slot_remove_write_access(). */
+	lockdep_assert_held(&kvm->slots_lock);
+
+	/*
+	 * All the TLBs can be flushed out of mmu lock, see the comments in
+	 * kvm_mmu_slot_remove_write_access().
+	 */
+	if (is_dirty)
+		kvm_flush_remote_tlbs(kvm);
+
+	r = -EFAULT;
+	if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+		goto out;
+
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
 #else  /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
 
 static int kvm_init_mmu_notifier(struct kvm *kvm)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2014-07-17 16:17 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-04 21:11 [RESEND PATCH v7 3/4] arm: dirty log write protect management support Mario Smarduch
2014-06-05  6:55 ` Xiao Guangrong
2014-06-05 19:09   ` Mario Smarduch
2014-06-06  5:52     ` Xiao Guangrong
2014-06-06 17:36       ` Mario Smarduch
  -- strict thread matches above, loose matches on Subject: below --
2014-06-03 23:19 [PATCH v7 0/4] arm: dirty page logging support for ARMv7 Mario Smarduch
2014-06-06 17:33 ` [RESEND PATCH v7 3/4] arm: dirty log write protect management support Mario Smarduch
2014-06-08 12:05   ` Christoffer Dall
2014-06-10  1:47     ` Mario Smarduch
2014-06-10  9:22       ` Christoffer Dall
2014-06-10 18:08         ` Mario Smarduch
2014-06-11  7:03           ` Christoffer Dall
2014-06-12  3:02             ` Mario Smarduch
2014-06-18  1:41             ` Mario Smarduch
2014-07-03 15:04               ` Christoffer Dall
2014-07-04 16:29                 ` Paolo Bonzini
2014-07-17 16:00                   ` Mario Smarduch
2014-07-17 16:17                 ` Mario Smarduch

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).