From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <linux-kernel-owner@vger.kernel.org>
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1427891AbcBSLwE (ORCPT <rfc822;w@1wt.eu>);
	Fri, 19 Feb 2016 06:52:04 -0500
Received: from mail-wm0-f42.google.com ([74.125.82.42]:35693 "EHLO
	mail-wm0-f42.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1427313AbcBSLv7 (ORCPT
	<rfc822;linux-kernel@vger.kernel.org>);
	Fri, 19 Feb 2016 06:51:59 -0500
Subject: Re: [PATCH v3 07/11] KVM: page track: add notifier support
To: Xiao Guangrong <guangrong.xiao@linux.intel.com>
References: <1455449503-20993-1-git-send-email-guangrong.xiao@linux.intel.com>
 <1455449503-20993-8-git-send-email-guangrong.xiao@linux.intel.com>
Cc: gleb@kernel.org, mtosatti@redhat.com, kvm@vger.kernel.org,
        linux-kernel@vger.kernel.org, kai.huang@linux.intel.com,
        jike.song@intel.com
From: Paolo Bonzini <pbonzini@redhat.com>
X-Enigmail-Draft-Status: N1110
Message-ID: <56C701DC.40904@redhat.com>
Date: Fri, 19 Feb 2016 12:51:56 +0100
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101
 Thunderbird/38.5.0
MIME-Version: 1.0
In-Reply-To: <1455449503-20993-8-git-send-email-guangrong.xiao@linux.intel.com>
Content-Type: text/plain; charset=windows-1252
Content-Transfer-Encoding: 8bit
Sender: linux-kernel-owner@vger.kernel.org
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org


On 14/02/2016 12:31, Xiao Guangrong wrote:
> Notifier list is introduced so that any node wants to receive the track
> event can register to the list
> 
> Two APIs are introduced here:
> - kvm_page_track_register_notifier(): register the notifier to receive
>   track event
> 
> - kvm_page_track_unregister_notifier(): stop receiving track event by
>   unregister the notifier
> 
> The callback, node->track_write() is called when a write access on the
> write tracked page happens
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h       |  1 +
>  arch/x86/include/asm/kvm_page_track.h | 39 ++++++++++++++++++++
>  arch/x86/kvm/page_track.c             | 67 +++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.c                    |  4 +++
>  4 files changed, 111 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d8931d0..282bc2f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -696,6 +696,7 @@ struct kvm_arch {
>  	 */
>  	struct list_head active_mmu_pages;
>  	struct list_head zapped_obsolete_pages;
> +	struct kvm_page_track_notifier_head track_notifier_head;
>  
>  	struct list_head assigned_dev_head;
>  	struct iommu_domain *iommu_domain;
> diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
> index 97ac9c3..1aae4ef 100644
> --- a/arch/x86/include/asm/kvm_page_track.h
> +++ b/arch/x86/include/asm/kvm_page_track.h
> @@ -6,6 +6,36 @@ enum kvm_page_track_mode {
>  	KVM_PAGE_TRACK_MAX,
>  };
>  
> +/*
> + * The notifier represented by @kvm_page_track_notifier_node is linked into
> + * the head which will be notified when guest is triggering the track event.
> + *
> + * Write access on the head is protected by kvm->mmu_lock, read access
> + * is protected by track_srcu.
> + */
> +struct kvm_page_track_notifier_head {
> +	struct srcu_struct track_srcu;
> +	struct hlist_head track_notifier_list;
> +};
> +
> +struct kvm_page_track_notifier_node {
> +	struct hlist_node node;
> +
> +	/*
> +	 * It is called when guest is writing the write-tracked page
> +	 * and write emulation is finished at that time.
> +	 *
> +	 * @vcpu: the vcpu where the write access happened.
> +	 * @gpa: the physical address written by guest.
> +	 * @new: the data was written to the address.
> +	 * @bytes: the written length.
> +	 */
> +	void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
> +			    int bytes);
> +};
> +
> +void kvm_page_track_init(struct kvm *kvm);
> +
>  void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
>  				 struct kvm_memory_slot *dont);
>  int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
> @@ -25,4 +55,13 @@ void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn,
>  				enum kvm_page_track_mode mode);
>  bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn,
>  			       enum kvm_page_track_mode mode);
> +
> +void
> +kvm_page_track_register_notifier(struct kvm *kvm,
> +				 struct kvm_page_track_notifier_node *n);
> +void
> +kvm_page_track_unregister_notifier(struct kvm *kvm,
> +				   struct kvm_page_track_notifier_node *n);
> +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
> +			  int bytes);
>  #endif
> diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
> index de9b32f..0692cc6 100644
> --- a/arch/x86/kvm/page_track.c
> +++ b/arch/x86/kvm/page_track.c
> @@ -188,3 +188,70 @@ bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn,
>  
>  	return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]);
>  }
> +
> +void kvm_page_track_init(struct kvm *kvm)
> +{
> +	struct kvm_page_track_notifier_head *head;
> +
> +	head = &kvm->arch.track_notifier_head;
> +	init_srcu_struct(&head->track_srcu);
> +	INIT_HLIST_HEAD(&head->track_notifier_list);
> +}
> +
> +/*
> + * register the notifier so that event interception for the tracked guest
> + * pages can be received.
> + */
> +void
> +kvm_page_track_register_notifier(struct kvm *kvm,
> +				 struct kvm_page_track_notifier_node *n)
> +{
> +	struct kvm_page_track_notifier_head *head;
> +
> +	head = &kvm->arch.track_notifier_head;
> +
> +	spin_lock(&kvm->mmu_lock);
> +	hlist_add_head_rcu(&n->node, &head->track_notifier_list);
> +	spin_unlock(&kvm->mmu_lock);
> +}
> +
> +/*
> + * stop receiving the event interception. It is the opposed operation of
> + * kvm_page_track_register_notifier().
> + */
> +void
> +kvm_page_track_unregister_notifier(struct kvm *kvm,
> +				   struct kvm_page_track_notifier_node *n)
> +{
> +	struct kvm_page_track_notifier_head *head;
> +
> +	head = &kvm->arch.track_notifier_head;
> +
> +	spin_lock(&kvm->mmu_lock);
> +	hlist_del_rcu(&n->node);
> +	spin_unlock(&kvm->mmu_lock);
> +	synchronize_srcu(&head->track_srcu);
> +}
> +
> +/*
> + * Notify the node that write access is intercepted and write emulation is
> + * finished at this time.
> + *
> + * The node should figure out if the written page is the one that node is
> + * interested in by itself.
> + */
> +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
> +			  int bytes)
> +{
> +	struct kvm_page_track_notifier_head *head;
> +	struct kvm_page_track_notifier_node *n;
> +	int idx;
> +
> +	head = &vcpu->kvm->arch.track_notifier_head;

Please check outside SRCU if the notifier list is empty.  If so, there
is no need to do the (relatively) expensive srcu_read_lock/unlock.

Paolo

> +	idx = srcu_read_lock(&head->track_srcu);
> +	hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
> +		if (n->track_write)
> +			n->track_write(vcpu, gpa, new, bytes);
> +	srcu_read_unlock(&head->track_srcu, idx);
> +}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e25ebb7..98019b6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4370,6 +4370,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
>  	if (ret < 0)
>  		return 0;

A kvm_vcpu_mark_page_dirty is missing here, isn't it?  I can take care
of it, but it would be great if you double-checked this.  If so, that
should be fixed in stable kernels too.

Can you add a kvm_vcpu_note_page_write(vcpu, gpa, val, bytes) function
that takes care of calling kvm_vcpu_mark_page_dirty, kvm_mmu_pte_write
and kvm_page_track-write?

Thanks,

Paolo

>  	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
> +	kvm_page_track_write(vcpu, gpa, val, bytes);
>  	return 1;
>  }
>  
> @@ -4628,6 +4629,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
>  
>  	kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
>  	kvm_mmu_pte_write(vcpu, gpa, new, bytes);
> +	kvm_page_track_write(vcpu, gpa, new, bytes);
>  
>  	return X86EMUL_CONTINUE;
>  
> @@ -7748,6 +7750,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
>  	INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
>  
> +	kvm_page_track_init(kvm);
> +
>  	return 0;
>  }
>  
>