All of lore.kernel.org
 help / color / mirror / Atom feed
From: Izik Eidus <ieidus@redhat.com>
To: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org, avi@redhat.com
Subject: Re: [PATCH 2/2] kvm: change the dirty page tracking to work with dirty bity
Date: Thu, 11 Jun 2009 02:57:02 +0300	[thread overview]
Message-ID: <4A30484E.1040708@redhat.com> (raw)
In-Reply-To: <20090610204227.GA12682@amt.cnet>

Marcelo Tosatti wrote:
> On Wed, Jun 10, 2009 at 07:23:25PM +0300, Izik Eidus wrote:
>   
>> change the dirty page tracking to work with dirty bity instead of page fault.
>> right now the dirty page tracking work with the help of page faults, when we
>> want to track a page for being dirty, we write protect it and we mark it dirty
>> when we have write page fault, this code move into looking at the dirty bit
>> of the spte.
>>
>> Signed-off-by: Izik Eidus <ieidus@redhat.com>
>> ---
>>  arch/ia64/kvm/kvm-ia64.c        |    4 +++
>>  arch/powerpc/kvm/powerpc.c      |    4 +++
>>  arch/s390/kvm/kvm-s390.c        |    4 +++
>>  arch/x86/include/asm/kvm_host.h |    3 ++
>>  arch/x86/kvm/mmu.c              |   42 ++++++++++++++++++++++++++++++++++++--
>>  arch/x86/kvm/svm.c              |    7 ++++++
>>  arch/x86/kvm/vmx.c              |    7 ++++++
>>  arch/x86/kvm/x86.c              |   26 ++++++++++++++++++++---
>>  include/linux/kvm_host.h        |    1 +
>>  virt/kvm/kvm_main.c             |    6 ++++-
>>  10 files changed, 96 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
>> index 3199221..5914128 100644
>> --- a/arch/ia64/kvm/kvm-ia64.c
>> +++ b/arch/ia64/kvm/kvm-ia64.c
>> @@ -1809,6 +1809,10 @@ void kvm_arch_exit(void)
>>  	kvm_vmm_info = NULL;
>>  }
>>  
>> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
>> +{
>> +}
>> +
>>  static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
>>  		struct kvm_dirty_log *log)
>>  {
>> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
>> index 2cf915e..6beb368 100644
>> --- a/arch/powerpc/kvm/powerpc.c
>> +++ b/arch/powerpc/kvm/powerpc.c
>> @@ -418,6 +418,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
>>  	return -ENOTSUPP;
>>  }
>>     
>
>   
>>  
>>     
>
> #ifndef KVM_ARCH_HAVE_DIRTY_LOG
>   
>> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
>> +{
>> +}
>> +
>>     
> #endif
>
> in virt/kvm/main.c
>
>
>   
>> index c7b0cc2..8a24149 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -527,6 +527,7 @@ struct kvm_x86_ops {
>>  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>>  	int (*get_tdp_level)(void);
>>  	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
>> +	int (*dirty_bit_support)(void);
>>  };
>>  
>>  extern struct kvm_x86_ops *kvm_x86_ops;
>> @@ -796,4 +797,6 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
>>  int kvm_age_hva(struct kvm *kvm, unsigned long hva);
>>  int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
>>  
>> +int is_dirty_and_clean_rmapp(struct kvm *kvm, unsigned long *rmapp);
>> +
>>  #endif /* _ASM_X86_KVM_HOST_H */
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 809cce0..500e0e2 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -140,6 +140,8 @@ module_param(oos_shadow, bool, 0644);
>>  #define ACC_USER_MASK    PT_USER_MASK
>>  #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
>>  
>> +#define SPTE_DONT_DIRTY (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
>> +
>>  #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
>>  
>>  struct kvm_rmap_desc {
>> @@ -629,6 +631,29 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
>>  	return NULL;
>>  }
>>  
>> +int is_dirty_and_clean_rmapp(struct kvm *kvm, unsigned long *rmapp)
>> +{
>> +	u64 *spte;
>> +	int dirty = 0;
>> +
>> +	if (!shadow_dirty_mask)
>> +		return 0;
>> +
>> +	spte = rmap_next(kvm, rmapp, NULL);
>> +	while (spte) {
>> +		if (*spte & PT_DIRTY_MASK) {
>> +			set_shadow_pte(spte, (*spte &= ~PT_DIRTY_MASK) |
>> +				       SPTE_DONT_DIRTY);
>> +			dirty = 1;
>> +			break;
>> +		}
>> +		spte = rmap_next(kvm, rmapp, spte);
>> +	}
>> +
>> +	return dirty;
>> +}
>> +
>> +
>>  static int rmap_write_protect(struct kvm *kvm, u64 gfn)
>>  {
>>  	unsigned long *rmapp;
>> @@ -1381,11 +1406,17 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
>>  static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>>  {
>>  	int ret;
>> +	int i;
>> +
>>  	++kvm->stat.mmu_shadow_zapped;
>>  	ret = mmu_zap_unsync_children(kvm, sp);
>>  	kvm_mmu_page_unlink_children(kvm, sp);
>>  	kvm_mmu_unlink_parents(kvm, sp);
>>  	kvm_flush_remote_tlbs(kvm);
>> +	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
>> +		if (sp->spt[i] & PT_DIRTY_MASK)
>> +			mark_page_dirty(kvm, sp->gfns[i]);
>> +	}
>>     
>
> Also need to transfer dirty bit in other places probably.
>   


Yes, i can think about some other case, but maybe i can avoid it using 
some trick.


>   
>>  	if (!sp->role.invalid && !sp->role.direct)
>>  		unaccount_shadowed(kvm, sp->gfn);
>>  	if (sp->unsync)
>> @@ -1676,7 +1707,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
>>  	 * whether the guest actually used the pte (in order to detect
>>  	 * demand paging).
>>  	 */
>> -	spte = shadow_base_present_pte | shadow_dirty_mask;
>> +	spte = shadow_base_present_pte;
>> +	if (!(spte & SPTE_DONT_DIRTY))
>> +		spte |= shadow_dirty_mask;
>> +
>>  	if (!speculative)
>>  		spte |= shadow_accessed_mask;
>>  	if (!dirty)
>> @@ -1725,8 +1759,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
>>  		}
>>  	}
>>  
>> -	if (pte_access & ACC_WRITE_MASK)
>> -		mark_page_dirty(vcpu->kvm, gfn);
>> +	if (!shadow_dirty_mask) {
>> +		if (pte_access & ACC_WRITE_MASK)
>> +			mark_page_dirty(vcpu->kvm, gfn);
>> +	}
>>     
>
> You can avoid the mark_page_dirty if SPTE_DONT_DIRTY? (which is a good idea,
> gfn_to_memslot_unaliased and friends show up high in profiling).
>   

This code shouldnt run on anything but EPT, shadow_dirty_mask should be 
set to zero only on EPT that doesnt have dirty bit tracking, so we still 
need to mark the page dirty from this context...
>   
>>  set_pte:
>>  	set_shadow_pte(shadow_pte, spte);
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 37397f6..5b6351e 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -2724,6 +2724,11 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
>>  	return 0;
>>  }
>>  
>> +static int svm_dirty_bit_support(void)
>> +{
>> +	return 1;
>> +}
>> +
>>  static struct kvm_x86_ops svm_x86_ops = {
>>  	.cpu_has_kvm_support = has_svm,
>>  	.disabled_by_bios = is_disabled,
>> @@ -2785,6 +2790,8 @@ static struct kvm_x86_ops svm_x86_ops = {
>>  	.set_tss_addr = svm_set_tss_addr,
>>  	.get_tdp_level = get_npt_level,
>>  	.get_mt_mask = svm_get_mt_mask,
>> +
>> +	.dirty_bit_support = svm_dirty_bit_support,
>>  };
>>  
>>  static int __init svm_init(void)
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 673bcb3..8903314 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -3774,6 +3774,11 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
>>  	return ret;
>>  }
>>  
>> +static int vmx_dirty_bit_support(void)
>> +{
>> +	return false;
>> +}
>> +
>>  static struct kvm_x86_ops vmx_x86_ops = {
>>  	.cpu_has_kvm_support = cpu_has_kvm_support,
>>  	.disabled_by_bios = vmx_disabled_by_bios,
>> @@ -3833,6 +3838,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
>>  	.set_tss_addr = vmx_set_tss_addr,
>>  	.get_tdp_level = get_ept_level,
>>  	.get_mt_mask = vmx_get_mt_mask,
>> +
>> +	.dirty_bit_support = vmx_dirty_bit_support,
>>  };
>>  
>>  static int __init vmx_init(void)
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 272e2e8..e06387c 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -1963,6 +1963,20 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
>>  	return 0;
>>  }
>>  
>> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
>> +{
>> +	int i;
>> +
>> +	spin_lock(&kvm->mmu_lock);
>> +	for (i = 0; i < memslot->npages; ++i) {
>> +		if (!test_bit(i, memslot->dirty_bitmap)) {
>> +			if (is_dirty_and_clean_rmapp(kvm, &memslot->rmap[i]))
>> +				set_bit(i, memslot->dirty_bitmap);
>> +		}
>> +	}
>> +	spin_unlock(&kvm->mmu_lock);
>> +}
>> +
>>  /*
>>   * Get (and clear) the dirty memory log for a memory slot.
>>   */
>> @@ -1982,10 +1996,14 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>>  
>>  	/* If nothing is dirty, don't bother messing with page tables. */
>>  	if (is_dirty) {
>> -		spin_lock(&kvm->mmu_lock);
>> -		kvm_mmu_slot_remove_write_access(kvm, log->slot);
>> -		spin_unlock(&kvm->mmu_lock);
>> -		kvm_flush_remote_tlbs(kvm);
>> +		if (!kvm_x86_ops->dirty_bit_support()) {
>> +			spin_lock(&kvm->mmu_lock);
>> +			/*  remove_write_access() flush the tlb */
>> +			kvm_mmu_slot_remove_write_access(kvm, log->slot);
>> +			spin_unlock(&kvm->mmu_lock);
>> +		} else {
>> +			kvm_flush_remote_tlbs(kvm);
>> +		}
>>  		memslot = &kvm->memslots[log->slot];
>>  		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
>>  		memset(memslot->dirty_bitmap, 0, n);
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index cdf1279..d1657a3 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -250,6 +250,7 @@ int kvm_dev_ioctl_check_extension(long ext);
>>  
>>  int kvm_get_dirty_log(struct kvm *kvm,
>>  			struct kvm_dirty_log *log, int *is_dirty);
>> +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
>>  int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>>  				struct kvm_dirty_log *log);
>>  
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 3046e9c..a876231 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -1135,8 +1135,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
>>  	}
>>  
>>  	/* Free page dirty bitmap if unneeded */
>> -	if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
>> +	if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) {
>>  		new.dirty_bitmap = NULL;
>> +		if (old.flags & KVM_MEM_LOG_DIRTY_PAGES)
>> +			kvm_arch_flush_shadow(kvm);
>> +	}
>>     
>
> Whats this for?
>   

We have added all this SPTE_DONT_DIRTY..., when we stop dirty bit 
tracking, we want to continue setting the dirty bit for the spte inside 
set_spte(), so writing to the page would be faster....

> The idea of making dirty bit accessible (also can use it to map host
> ptes read-only, when guest fault is read-only, for KSM (*)) is good. But
> better first introduce infrastructure to handle dirty bit (making sure
> the bit is transferred properly), then have logging make use of it.
>   

???, I dont understand it much, it mean you want to continue trying 
walking in that direction of the patch? or you want some other way?

> * EPT violations do no transfer fault information to the page fault
> handler, but its available (there's a vm-exit field).
>   

EPT in this patch, run as before, (using page faults with 
mark_page_dirty() inside set_spte())

>   
>>  	r = -ENOMEM;
>>  
>> @@ -1279,6 +1282,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
>>  	if (!memslot->dirty_bitmap)
>>  		goto out;
>>  
>> +	kvm_arch_get_dirty_log(kvm, memslot);
>>  	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
>>  
>>  	for (i = 0; !any && i < n/sizeof(long); ++i)
>> -- 
>> 1.5.6.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>     


Thanks :).

  reply	other threads:[~2009-06-10 23:57 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-06-10 16:23 [PATCH 0/2] RFC use dirty bit for page dirty tracking (v2) Izik Eidus
2009-06-10 16:23 ` [PATCH 1/2] kvm: fix dirty bit tracking for slots with large pages Izik Eidus
2009-06-10 16:23   ` [PATCH 2/2] kvm: change the dirty page tracking to work with dirty bity Izik Eidus
2009-06-10 17:00     ` Izik Eidus
2009-06-10 20:42     ` Marcelo Tosatti
2009-06-10 23:57       ` Izik Eidus [this message]
2009-06-10 23:59         ` Izik Eidus
2009-06-11  1:04         ` Marcelo Tosatti
2009-06-11 11:27           ` Izik Eidus
2009-06-11 12:24             ` Marcelo Tosatti
2009-06-11 15:49               ` Izik Eidus
2009-06-11  8:24     ` Ulrich Drepper
2009-06-11  9:44       ` Izik Eidus
2009-06-11  9:33     ` Avi Kivity
2009-06-11  9:48       ` Izik Eidus
2009-06-11  9:53         ` Avi Kivity
2009-06-14 11:10   ` [PATCH 1/2] kvm: fix dirty bit tracking for slots with large pages Avi Kivity
2009-06-15 14:58 ` [PATCH 0/2] RFC use dirty bit for page dirty tracking (v2) Ryan Harper
2009-06-15 15:19   ` Avi Kivity
2009-07-07  3:32     ` Ryan Harper
2009-07-07  5:11       ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4A30484E.1040708@redhat.com \
    --to=ieidus@redhat.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.