All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Tian, Kevin" <kevin.tian@intel.com>
To: David Vrabel <david.vrabel@citrix.com>,
	"xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	Jan Beulich <jbeulich@suse.com>,
	"Nakajima, Jun" <jun.nakajima@intel.com>
Subject: Re: [PATCHv7] x86/ept: defer the invalidation until the p2m lock is released
Date: Wed, 3 Feb 2016 03:44:57 +0000	[thread overview]
Message-ID: <AADFC41AFE54684AB9EE6CBC0274A5D15F79BF2C@SHSMSX103.ccr.corp.intel.com> (raw)
In-Reply-To: <1454343998-26294-2-git-send-email-david.vrabel@citrix.com>

> From: David Vrabel [mailto:david.vrabel@citrix.com]
> Sent: Tuesday, February 02, 2016 12:27 AM
> 
> Holding the p2m lock while calling ept_sync_domain() is very expensive
> since it does a on_selected_cpus() call.  IPIs on many socket machines
> can be very slows and on_selected_cpus() is serialized.

slows -> slow

> 
> It is safe to defer the invalidate until the p2m lock is released
> except for two cases:
> 
> 1. When freeing a page table page (since partial translations may be
>    cached).
> 2. When reclaiming a zero page as part of PoD.
> 
> For these cases, add p2m_tlb_flush_sync() calls which will immediately
> perform the invalidate before the page is freed or reclaimed.
> 
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
[...]
> diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
> index c094320..43c7f1b 100644
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -263,6 +263,7 @@ static void ept_free_entry(struct p2m_domain *p2m, ept_entry_t
> *ept_entry, int l
>          unmap_domain_page(epte);
>      }
> 
> +    p2m_tlb_flush_sync(p2m);
>      p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn));
>  }
> 
> @@ -1095,15 +1096,10 @@ static void __ept_sync_domain(void *info)
>       */
>  }
> 
> -void ept_sync_domain(struct p2m_domain *p2m)
> +static void ept_sync_domain_prepare(struct p2m_domain *p2m)
>  {
>      struct domain *d = p2m->domain;
>      struct ept_data *ept = &p2m->ept;
> -    /* Only if using EPT and this domain has some VCPUs to dirty. */
> -    if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
> -        return;
> -
> -    ASSERT(local_irq_is_enabled());
> 
>      if ( nestedhvm_enabled(d) && !p2m_is_nestedp2m(p2m) )
>          p2m_flush_nestedp2m(d);

should we postpone nestedp2m flush similarly, which also incurs 
on_selected_cpus when holding p2m lock?

> @@ -1116,9 +1112,38 @@ void ept_sync_domain(struct p2m_domain *p2m)
>       *    of an EP4TA reuse is still needed.
>       */
>      cpumask_setall(ept->invalidate);
> +}
> +
> +static void ept_sync_domain_mask(struct p2m_domain *p2m, const cpumask_t *mask)
> +{
> +    on_selected_cpus(mask, __ept_sync_domain, p2m, 1);
> +}
> +
> +void ept_sync_domain(struct p2m_domain *p2m)
> +{
> +    struct domain *d = p2m->domain;
> 
> -    on_selected_cpus(d->domain_dirty_cpumask,
> -                     __ept_sync_domain, p2m, 1);
> +    /* Only if using EPT and this domain has some VCPUs to dirty. */
> +    if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
> +        return;
> +
> +    ept_sync_domain_prepare(p2m);
> +
> +    if ( p2m->defer_flush )
> +    {
> +        p2m->need_flush = 1;
> +        return;
> +    }
> +
> +    ept_sync_domain_mask(p2m, d->domain_dirty_cpumask);
> +}
> +
> +static void ept_flush_and_unlock(struct p2m_domain *p2m, bool_t unlock)
> +{
> +    p2m->need_flush = 0;
> +    if ( unlock )
> +        mm_write_unlock(&p2m->lock);
> +    ept_sync_domain_mask(p2m, p2m->domain->domain_dirty_cpumask);
>  }
> 
>  static void ept_enable_pml(struct p2m_domain *p2m)
> @@ -1169,6 +1194,7 @@ int ept_p2m_init(struct p2m_domain *p2m)
>      p2m->change_entry_type_range = ept_change_entry_type_range;
>      p2m->memory_type_changed = ept_memory_type_changed;
>      p2m->audit_p2m = NULL;
> +    p2m->flush_and_unlock = ept_flush_and_unlock;
> 
>      /* Set the memory type used when accessing EPT paging structures. */
>      ept->ept_mt = EPT_DEFAULT_MT;
> diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
> index ea16d3e..35835d1 100644
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -626,6 +626,7 @@ p2m_pod_decrease_reservation(struct domain *d,
> 
>              p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
>                            p2m_invalid, p2m->default_access);
> +            p2m_tlb_flush_sync(p2m);
>              for ( j = 0; j < n; ++j )
>                  set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
>              p2m_pod_cache_add(p2m, page, cur_order);
> @@ -755,6 +756,7 @@ p2m_pod_zero_check_superpage(struct p2m_domain *p2m,
> unsigned long gfn)
>      /* Try to remove the page, restoring old mapping if it fails. */
>      p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_2M,
>                    p2m_populate_on_demand, p2m->default_access);
> +    p2m_tlb_flush_sync(p2m);
> 
>      /* Make none of the MFNs are used elsewhere... for example, mapped
>       * via the grant table interface, or by qemu.  Allow one refcount for
> @@ -886,6 +888,8 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long
> *gfns, int count)
>          }
>      }
> 
> +    p2m_tlb_flush_sync(p2m);
> +
>      /* Now check each page for real */
>      for ( i=0; i < count; i++ )
>      {
> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index a45ee35..36a8fb7 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -325,6 +325,25 @@ void p2m_flush_hardware_cached_dirty(struct domain *d)
>      }
>  }
> 
> +/*
> + * Force a synchronous P2M TLB flush if a deferred flush is pending.
> + *
> + * Must be called with the p2m lock held.
> + */
> +void p2m_tlb_flush_sync(struct p2m_domain *p2m)
> +{
> +    if ( p2m->need_flush )
> +        p2m->flush_and_unlock(p2m, 0);
> +}
> +
> +void p2m_tlb_flush_and_unlock(struct p2m_domain *p2m)
> +{
> +    if ( p2m->need_flush )
> +        p2m->flush_and_unlock(p2m, 1);
> +    else
> +        mm_write_unlock(&p2m->lock);
> +}

prefer to move general stuff into this function, then you could just
keep a flush() callback, e.g.:

void p2m_tlb_flush_and_unlock(struct p2m_domain *p2m)
{
    if ( p2m->need_flush )
    {
        p2m->need_flush = 0;
        mm_write_unlock(&p2m->lock);
        p2m->flush(p2m);
    }
    else
        mm_write_unlock(&p2m->lock);
}

Same for p2m_tlb_flush_sync.

Thanks
Kevin

  reply	other threads:[~2016-02-03  3:45 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-01 16:26 [PATCHv7 0/1] x86/ept: reduce translation invalidation impact David Vrabel
2016-02-01 16:26 ` [PATCHv7] x86/ept: defer the invalidation until the p2m lock is released David Vrabel
2016-02-03  3:44   ` Tian, Kevin [this message]
2016-04-12 13:08     ` David Vrabel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=AADFC41AFE54684AB9EE6CBC0274A5D15F79BF2C@SHSMSX103.ccr.corp.intel.com \
    --to=kevin.tian@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=david.vrabel@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.