All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <sean.j.christopherson@intel.com>
To: eric van tassell <Eric.VanTassell@amd.com>
Cc: kvm@vger.kernel.org, bp@alien8.de, hpa@zytor.com,
	mingo@redhat.com, jmattson@google.com, joro@8bytes.org,
	pbonzini@redhat.com, tglx@linutronix.de, vkuznets@redhat.com,
	wanpengli@tencent.com, x86@kernel.org, evantass@amd.com
Subject: Re: [Patch 2/4] KVM:SVM: Introduce set_spte_notify support
Date: Fri, 31 Jul 2020 13:25:02 -0700	[thread overview]
Message-ID: <20200731202502.GG31451@linux.intel.com> (raw)
In-Reply-To: <20200724235448.106142-3-Eric.VanTassell@amd.com>

On Fri, Jul 24, 2020 at 06:54:46PM -0500, eric van tassell wrote:
> Improve SEV guest startup time from O(n) to a constant by deferring
> guest page pinning until the pages are used to satisfy nested page faults.
> 
> Implement the code to do the pinning (sev_get_page) and the notifier
> sev_set_spte_notify().
> 
> Track the pinned pages with xarray so they can be released during guest
> termination.

I like that SEV is trying to be a better citizen, but this is trading one
hack for another.

  - KVM goes through a lot of effort to ensure page faults don't need to
    allocate memory, and this throws all that effort out the window.

  - Tracking all gfns in a separate database (from the MMU) is wasteful.

  - Having to wait to free pinned memory until the VM is destroyed is less
    than ideal.

More thoughts in the next patch.

> Signed-off-by: eric van tassell <Eric.VanTassell@amd.com>
> ---
>  arch/x86/kvm/svm/sev.c | 71 ++++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/svm/svm.c |  2 ++
>  arch/x86/kvm/svm/svm.h |  3 ++
>  3 files changed, 76 insertions(+)
> 
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index f7f1f4ecf08e..040ae4aa7c5a 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -184,6 +184,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	sev->asid = asid;
>  	INIT_LIST_HEAD(&sev->regions_list);
>  
> +	xa_init(&sev->pages_xarray);
> +
>  	return 0;
>  
>  e_free:
> @@ -415,6 +417,42 @@ static unsigned long get_num_contig_pages(unsigned long idx,
>  	return pages;
>  }
>  
> +static int sev_get_page(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn)
> +{
> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +	struct xarray *xa = &sev->pages_xarray;
> +	struct page *page = pfn_to_page(pfn);
> +	int ret;
> +
> +	/* store page at index = gfn */
> +	ret = xa_insert(xa, gfn, page, GFP_ATOMIC);
> +	if (ret == -EBUSY) {
> +		/*
> +		 * If xa_insert returned -EBUSY, the  gfn was already associated
> +		 * with a struct page *.
> +		 */
> +		struct page *cur_page;
> +
> +		cur_page = xa_load(xa, gfn);
> +		/* If cur_page == page, no change is needed, so return 0 */
> +		if (cur_page == page)
> +			return 0;
> +
> +		/* Release the page that was stored at index = gfn */
> +		put_page(cur_page);
> +
> +		/* Return result of attempting to store page at index = gfn */
> +		ret = xa_err(xa_store(xa, gfn, page, GFP_ATOMIC));
> +	}
> +
> +	if (ret)
> +		return ret;
> +
> +	get_page(page);
> +
> +	return 0;
> +}
> +
>  static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  {
>  	unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
> @@ -1085,6 +1123,8 @@ void sev_vm_destroy(struct kvm *kvm)
>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
>  	struct list_head *head = &sev->regions_list;
>  	struct list_head *pos, *q;
> +	XA_STATE(xas, &sev->pages_xarray, 0);
> +	struct page *xa_page;
>  
>  	if (!sev_guest(kvm))
>  		return;
> @@ -1109,6 +1149,12 @@ void sev_vm_destroy(struct kvm *kvm)
>  		}
>  	}
>  
> +	/* Release each pinned page that SEV tracked in sev->pages_xarray. */
> +	xas_for_each(&xas, xa_page, ULONG_MAX) {
> +		put_page(xa_page);
> +	}
> +	xa_destroy(&sev->pages_xarray);
> +
>  	mutex_unlock(&kvm->lock);
>  
>  	sev_unbind_asid(kvm, sev->handle);
> @@ -1193,3 +1239,28 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
>  	svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
>  	vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
>  }
> +
> +int sev_set_spte_notify(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn,
> +			int level, bool mmio, u64 *spte)
> +{
> +	int rc;
> +
> +	if (!sev_guest(vcpu->kvm))
> +		return 0;
> +
> +	/* MMIO page contains the unencrypted data, no need to lock this page */
> +	if (mmio)

Rather than make this a generic set_spte() notify hook, I think it makes
more sense to specifying have it be a "pin_spte" style hook.  That way the
caller can skip mmio PFNs as well as flows that can't possibly be relevant
to SEV, e.g. the sync_page() flow.

> +		return 0;
> +
> +	rc = sev_get_page(vcpu->kvm, gfn, pfn);
> +	if (rc)
> +		return rc;
> +
> +	/*
> +	 * Flush any cached lines of the page being added since "ownership" of
> +	 * it will be transferred from the host to an encrypted guest.
> +	 */
> +	clflush_cache_range(__va(pfn << PAGE_SHIFT), page_level_size(level));
> +
> +	return 0;
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 535ad311ad02..9b304c761a99 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4130,6 +4130,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>  	.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
>  
>  	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
> +
> +	.set_spte_notify = sev_set_spte_notify,
>  };
>  
>  static struct kvm_x86_init_ops svm_init_ops __initdata = {
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 121b198b51e9..8a5c01516c89 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -65,6 +65,7 @@ struct kvm_sev_info {
>  	int fd;			/* SEV device fd */
>  	unsigned long pages_locked; /* Number of pages locked */
>  	struct list_head regions_list;  /* List of registered regions */
> +	struct xarray pages_xarray; /* List of PFN locked */
>  };
>  
>  struct kvm_svm {
> @@ -488,5 +489,7 @@ int svm_unregister_enc_region(struct kvm *kvm,
>  void pre_sev_run(struct vcpu_svm *svm, int cpu);
>  int __init sev_hardware_setup(void);
>  void sev_hardware_teardown(void);
> +int sev_set_spte_notify(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn,
> +			int level, bool mmio, u64 *spte);
>  
>  #endif
> -- 
> 2.17.1
> 

  reply	other threads:[~2020-07-31 20:25 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-24 23:54 [Patch 0/4] Defer page pinning for SEV guests until guest pages touched eric van tassell
2020-07-24 23:54 ` [Patch 1/4] KVM:MMU: Introduce the set_spte_notify() callback eric van tassell
2020-07-24 23:54 ` [Patch 2/4] KVM:SVM: Introduce set_spte_notify support eric van tassell
2020-07-31 20:25   ` Sean Christopherson [this message]
2020-08-02 20:53     ` Eric van Tassell
2020-08-03 16:27       ` Sean Christopherson
2020-08-19 16:03         ` Eric van Tassell
2020-08-19 16:05           ` Sean Christopherson
2020-08-20 17:05             ` Eric van Tassell
2020-08-20 23:59               ` Sean Christopherson
2020-08-21  0:36                 ` Eric van Tassell
2020-08-21 18:16                   ` Eric van Tassell
2020-07-24 23:54 ` [Patch 3/4] KVM:SVM: Pin sev_launch_update_data() pages via sev_get_page() eric van tassell
2020-07-31 20:40   ` Sean Christopherson
2020-08-02 23:55     ` Eric van Tassell
2020-08-19 16:20       ` Eric van Tassell
2020-07-24 23:54 ` [Patch 4/4] KVM:SVM: Remove struct enc_region and associated pinned page tracking eric van tassell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200731202502.GG31451@linux.intel.com \
    --to=sean.j.christopherson@intel.com \
    --cc=Eric.VanTassell@amd.com \
    --cc=bp@alien8.de \
    --cc=evantass@amd.com \
    --cc=hpa@zytor.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.