xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: "Tian, Kevin" <kevin.tian@intel.com>
To: Jan Beulich <jbeulich@suse.com>,
	"xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: "Cooper, Andrew" <andrew.cooper3@citrix.com>,
	"Wei Liu" <wl@xen.org>, "Roger Pau Monné" <roger.pau@citrix.com>,
	"George Dunlap" <george.dunlap@citrix.com>,
	"Nakajima, Jun" <jun.nakajima@intel.com>,
	"Tim Deegan" <tim@xen.org>
Subject: RE: [PATCH v4 1/3] VMX: use a single, global APIC access page
Date: Sun, 25 Apr 2021 01:27:22 +0000	[thread overview]
Message-ID: <MWHPR11MB18863486197C197C576A8E9B8C439@MWHPR11MB1886.namprd11.prod.outlook.com> (raw)
In-Reply-To: <837a427a-a74e-00e9-70c7-1d3cfa28b30c@suse.com>

> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, April 23, 2021 6:53 PM
> 
> The address of this page is used by the CPU only to recognize when to
> access the virtual APIC page instead. No accesses would ever go to this
> page. It only needs to be present in the (CPU) page tables so that
> address translation will produce its address as result for respective
> accesses.
> 
> By making this page global, we also eliminate the need to refcount it,
> or to assign it to any domain in the first place.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
> v5: Init apic_access_mfn to INVALID_MFN. Move assignment out of if()
>     condition. Introduce page_suppress_refcounting() and
>     page_refcounting_suppressed().
> v4: Set PGC_extra on the page. Make shadow mode work.
> v3: Split p2m insertion change to a separate patch.
> v2: Avoid insertion when !has_vlapic(). Split off change to
>     p2m_get_iommu_flags().
> ---
> I did further consider not allocating any real page at all, but just
> using the address of some unpopulated space (which would require
> announcing this page as reserved to Dom0, so it wouldn't put any PCI
> MMIO BARs there). But I thought this would be too controversial, because
> of the possible risks associated with this.
> 
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -66,8 +66,7 @@ boolean_param("force-ept", opt_force_ept
>  static void vmx_ctxt_switch_from(struct vcpu *v);
>  static void vmx_ctxt_switch_to(struct vcpu *v);
> 
> -static int  vmx_alloc_vlapic_mapping(struct domain *d);
> -static void vmx_free_vlapic_mapping(struct domain *d);
> +static int alloc_vlapic_mapping(void);
>  static void vmx_install_vlapic_mapping(struct vcpu *v);
>  static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
>                                  unsigned int flags);
> @@ -78,6 +77,8 @@ static int vmx_msr_read_intercept(unsign
>  static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
>  static void vmx_invlpg(struct vcpu *v, unsigned long linear);
> 
> +static mfn_t __read_mostly apic_access_mfn = INVALID_MFN_INITIALIZER;
> +
>  /* Values for domain's ->arch.hvm_domain.pi_ops.flags. */
>  #define PI_CSW_FROM (1u << 0)
>  #define PI_CSW_TO   (1u << 1)
> @@ -401,7 +402,6 @@ static int vmx_domain_initialise(struct
>          .to   = vmx_ctxt_switch_to,
>          .tail = vmx_do_resume,
>      };
> -    int rc;
> 
>      d->arch.ctxt_switch = &csw;
> 
> @@ -411,28 +411,22 @@ static int vmx_domain_initialise(struct
>       */
>      d->arch.hvm.vmx.exec_sp = is_hardware_domain(d) || opt_ept_exec_sp;
> 
> -    if ( !has_vlapic(d) )
> -        return 0;
> -
> -    if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
> -        return rc;
> -
>      return 0;
>  }
> 
> -static void vmx_domain_relinquish_resources(struct domain *d)
> +static void domain_creation_finished(struct domain *d)
>  {
> -    if ( !has_vlapic(d) )
> +    gfn_t gfn = gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE);
> +    uint8_t ipat;
> +
> +    if ( !has_vlapic(d) || mfn_eq(apic_access_mfn, INVALID_MFN) )
>          return;
> 
> -    vmx_free_vlapic_mapping(d);
> -}
> +    ASSERT(epte_get_entry_emt(d, gfn_x(gfn), apic_access_mfn, 0, &ipat,
> +                              true) == MTRR_TYPE_WRBACK);
> +    ASSERT(ipat);
> 
> -static void domain_creation_finished(struct domain *d)
> -{
> -    if ( has_vlapic(d) && !mfn_eq(d->arch.hvm.vmx.apic_access_mfn,
> _mfn(0)) &&
> -         set_mmio_p2m_entry(d, gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE),
> -                            d->arch.hvm.vmx.apic_access_mfn, PAGE_ORDER_4K) )
> +    if ( set_mmio_p2m_entry(d, gfn, apic_access_mfn, PAGE_ORDER_4K) )
>          domain_crash(d);
>  }
> 
> @@ -2415,7 +2409,6 @@ static struct hvm_function_table __initd
>      .cpu_up_prepare       = vmx_cpu_up_prepare,
>      .cpu_dead             = vmx_cpu_dead,
>      .domain_initialise    = vmx_domain_initialise,
> -    .domain_relinquish_resources = vmx_domain_relinquish_resources,
>      .domain_creation_finished = domain_creation_finished,
>      .vcpu_initialise      = vmx_vcpu_initialise,
>      .vcpu_destroy         = vmx_vcpu_destroy,
> @@ -2662,7 +2655,7 @@ const struct hvm_function_table * __init
>  {
>      set_in_cr4(X86_CR4_VMXE);
> 
> -    if ( vmx_vmcs_init() )
> +    if ( vmx_vmcs_init() || alloc_vlapic_mapping() )
>      {
>          printk("VMX: failed to initialise.\n");
>          return NULL;
> @@ -3224,7 +3217,7 @@ gp_fault:
>      return X86EMUL_EXCEPTION;
>  }
> 
> -static int vmx_alloc_vlapic_mapping(struct domain *d)
> +static int __init alloc_vlapic_mapping(void)
>  {
>      struct page_info *pg;
>      mfn_t mfn;
> @@ -3232,52 +3225,34 @@ static int vmx_alloc_vlapic_mapping(stru
>      if ( !cpu_has_vmx_virtualize_apic_accesses )
>          return 0;
> 
> -    pg = alloc_domheap_page(d, MEMF_no_refcount);
> +    pg = alloc_domheap_page(NULL, 0);
>      if ( !pg )
>          return -ENOMEM;
> 
> -    if ( !get_page_and_type(pg, d, PGT_writable_page) )
> -    {
> -        /*
> -         * The domain can't possibly know about this page yet, so failure
> -         * here is a clear indication of something fishy going on.
> -         */
> -        domain_crash(d);
> -        return -ENODATA;
> -    }
> +    /*
> +     * Signal to shadow code that this page cannot be refcounted. This also
> +     * makes epte_get_entry_emt() recognize this page as "special".
> +     */
> +    page_suppress_refcounting(pg);
> 
>      mfn = page_to_mfn(pg);
>      clear_domain_page(mfn);
> -    d->arch.hvm.vmx.apic_access_mfn = mfn;
> +    apic_access_mfn = mfn;
> 
>      return 0;
>  }
> 
> -static void vmx_free_vlapic_mapping(struct domain *d)
> -{
> -    mfn_t mfn = d->arch.hvm.vmx.apic_access_mfn;
> -
> -    d->arch.hvm.vmx.apic_access_mfn = _mfn(0);
> -    if ( !mfn_eq(mfn, _mfn(0)) )
> -    {
> -        struct page_info *pg = mfn_to_page(mfn);
> -
> -        put_page_alloc_ref(pg);
> -        put_page_and_type(pg);
> -    }
> -}
> -
>  static void vmx_install_vlapic_mapping(struct vcpu *v)
>  {
>      paddr_t virt_page_ma, apic_page_ma;
> 
> -    if ( mfn_eq(v->domain->arch.hvm.vmx.apic_access_mfn, _mfn(0)) )
> +    if ( !has_vlapic(v->domain) || mfn_eq(apic_access_mfn, INVALID_MFN) )
>          return;
> 
>      ASSERT(cpu_has_vmx_virtualize_apic_accesses);
> 
>      virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
> -    apic_page_ma = mfn_to_maddr(v->domain-
> >arch.hvm.vmx.apic_access_mfn);
> +    apic_page_ma = mfn_to_maddr(apic_access_mfn);
> 
>      vmx_vmcs_enter(v);
>      __vmwrite(VIRTUAL_APIC_PAGE_ADDR, virt_page_ma);
> --- a/xen/arch/x86/mm/shadow/set.c
> +++ b/xen/arch/x86/mm/shadow/set.c
> @@ -94,6 +94,15 @@ shadow_get_page_from_l1e(shadow_l1e_t sl
>      ASSERT(!sh_l1e_is_magic(sl1e));
>      ASSERT(shadow_mode_refcounts(d));
> 
> +    /*
> +     * Check whether refcounting is suppressed on this page. For example,
> +     * VMX'es APIC access MFN is just a surrogate page.  It doesn't actually
> +     * get accessed, and hence there's no need to refcount it.
> +     */
> +    mfn = shadow_l1e_get_mfn(sl1e);
> +    if ( mfn_valid(mfn) &&
> page_refcounting_suppressed(mfn_to_page(mfn)) )
> +        return 0;
> +
>      res = get_page_from_l1e(sl1e, d, d);
> 
>      /*
> --- a/xen/arch/x86/mm/shadow/types.h
> +++ b/xen/arch/x86/mm/shadow/types.h
> @@ -276,9 +276,16 @@ int shadow_set_l4e(struct domain *d, sha
>  static void inline
>  shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
>  {
> +    mfn_t mfn;
> +
>      if ( !shadow_mode_refcounts(d) )
>          return;
> 
> +    if ( mfn_valid(mfn = shadow_l1e_get_mfn(sl1e)) &&
> +         /* See the respective comment in shadow_get_page_from_l1e(). */
> +         page_refcounting_suppressed(mfn_to_page(mfn)) )
> +        return;
> +
>      put_page_from_l1e(sl1e, d);
>  }
> 
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -58,7 +58,6 @@ struct ept_data {
>  #define _VMX_DOMAIN_PML_ENABLED    0
>  #define VMX_DOMAIN_PML_ENABLED     (1ul <<
> _VMX_DOMAIN_PML_ENABLED)
>  struct vmx_domain {
> -    mfn_t apic_access_mfn;
>      /* VMX_DOMAIN_* */
>      unsigned int status;
> 
> --- a/xen/include/asm-x86/mm.h
> +++ b/xen/include/asm-x86/mm.h
> @@ -82,7 +82,7 @@
>  #define PGC_state_offlined PG_mask(2, 9)
>  #define PGC_state_free    PG_mask(3, 9)
>  #define page_state_is(pg, st) (((pg)->count_info&PGC_state) ==
> PGC_state_##st)
> -/* Page is not reference counted */
> +/* Page is not reference counted (see below for caveats) */
>  #define _PGC_extra        PG_shift(10)
>  #define PGC_extra         PG_mask(1, 10)
> 
> @@ -374,6 +374,24 @@ void zap_ro_mpt(mfn_t mfn);
> 
>  bool is_iomem_page(mfn_t mfn);
> 
> +/*
> + * Pages with no owner which may get passed to functions wanting to
> + * refcount them can be marked PGC_extra to bypass this refcounting
> (which
> + * would fail due to the lack of an owner).
> + *
> + * (For pages with owner PGC_extra has different meaning.)
> + */
> +static inline void page_suppress_refcounting(struct page_info *pg)
> +{
> +   ASSERT(!page_get_owner(pg));
> +   pg->count_info |= PGC_extra;
> +}
> +
> +static inline bool page_refcounting_suppressed(const struct page_info *pg)
> +{
> +    return !page_get_owner(pg) && (pg->count_info & PGC_extra);
> +}
> +
>  struct platform_bad_page {
>      unsigned long mfn;
>      unsigned int order;


  parent reply	other threads:[~2021-04-25  1:27 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-22 10:55 [PATCH v3 0/2] VMX: apic access page handling adjustments Jan Beulich
2021-02-22 10:56 ` [PATCH v3 1/2][4.15] VMX: delay p2m insertion of APIC access page Jan Beulich
2021-02-22 11:25   ` Ian Jackson
2021-02-22 14:05     ` Jan Beulich
2021-02-22 17:17       ` Ian Jackson
2021-02-22 12:15   ` Roger Pau Monné
2021-02-25  8:44   ` Jan Beulich
2021-02-26  7:06     ` Tian, Kevin
2021-02-22 10:57 ` [PATCH v3 2/2] VMX: use a single, global " Jan Beulich
2021-03-01  2:34   ` Tian, Kevin
2021-03-01  8:18     ` Jan Beulich
2021-04-12 10:40 ` [PATCH v4] " Jan Beulich
2021-04-12 15:31   ` Roger Pau Monné
2021-04-13  9:24     ` Jan Beulich
2021-04-13 10:18       ` Roger Pau Monné
2021-04-13 12:03         ` Jan Beulich
2021-04-13 13:03           ` Roger Pau Monné
2021-04-17 19:24   ` Tim Deegan
2021-04-19 11:25     ` Jan Beulich
2021-04-22  7:42       ` Tim Deegan
2021-04-22  9:38         ` Jan Beulich
2021-04-22 15:05           ` Tim Deegan
2021-04-23 10:51 ` [PATCH v4 0/3] VMX APIC access page and shadow adjustments Jan Beulich
2021-04-23 10:52   ` [PATCH v4 1/3] VMX: use a single, global APIC access page Jan Beulich
2021-04-23 14:17     ` Roger Pau Monné
2021-04-23 14:42       ` Jan Beulich
2021-04-26 17:55         ` Tim Deegan
2021-04-25  1:27     ` Tian, Kevin [this message]
2021-04-26 17:53     ` Tim Deegan
2021-04-23 10:53   ` [PATCH v4 2/3] x86/shadow: re-use variables in shadow_get_page_from_l1e() Jan Beulich
2021-04-23 10:54   ` [PATCH v4 3/3] x86/shadow: streamline shadow_get_page_from_l1e() Jan Beulich
2021-04-23 11:00   ` Really v5 (was: [PATCH v4 0/3] VMX APIC access page and shadow adjustments) Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=MWHPR11MB18863486197C197C576A8E9B8C439@MWHPR11MB1886.namprd11.prod.outlook.com \
    --to=kevin.tian@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=george.dunlap@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=roger.pau@citrix.com \
    --cc=tim@xen.org \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    --subject='RE: [PATCH v4 1/3] VMX: use a single, global APIC access page' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).