All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wei Liu <wl@xen.org>
To: Xen Development List <xen-devel@lists.xenproject.org>
Cc: "Wei Liu" <liuwe@microsoft.com>, "Wei Liu" <wl@xen.org>,
	"Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Paul Durrant" <pdurrant@amazon.com>,
	"Michael Kelley" <mikelley@microsoft.com>,
	"Jan Beulich" <jbeulich@suse.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>
Subject: [Xen-devel] [PATCH v4 3/3] x86/hyperv: L0 assisted TLB flush
Date: Wed, 19 Feb 2020 11:44:11 +0000	[thread overview]
Message-ID: <20200219114411.26922-4-liuwe@microsoft.com> (raw)
In-Reply-To: <20200219114411.26922-1-liuwe@microsoft.com>

Implement L0 assisted TLB flush for Xen on Hyper-V. It takes advantage
of several hypercalls:

 * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST
 * HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX
 * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE
 * HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX

Pick the most efficient hypercalls available.

Signed-off-by: Wei Liu <liuwe@microsoft.com>
---
v4:
1. Fix bank mask generation.
2. Fix page order calculation.
3. Remove types.h from private.h.
4. Add a note about nmi and mc handling.

v3:
1. Address more comments.
2. Fix usage of max_vp_index.
3. Use the fill_gva_list algorithm from Linux.

v2:
1. Address Roger and Jan's comments re types etc.
2. Fix pointer arithmetic.
3. Misc improvement to code.
---
 xen/arch/x86/guest/hyperv/Makefile  |   1 +
 xen/arch/x86/guest/hyperv/private.h |   8 ++
 xen/arch/x86/guest/hyperv/tlb.c     | 175 +++++++++++++++++++++++++++-
 xen/arch/x86/guest/hyperv/util.c    |  75 ++++++++++++
 4 files changed, 258 insertions(+), 1 deletion(-)
 create mode 100644 xen/arch/x86/guest/hyperv/util.c

diff --git a/xen/arch/x86/guest/hyperv/Makefile b/xen/arch/x86/guest/hyperv/Makefile
index 18902c33e9..0e39410968 100644
--- a/xen/arch/x86/guest/hyperv/Makefile
+++ b/xen/arch/x86/guest/hyperv/Makefile
@@ -1,2 +1,3 @@
 obj-y += hyperv.o
 obj-y += tlb.o
+obj-y += util.o
diff --git a/xen/arch/x86/guest/hyperv/private.h b/xen/arch/x86/guest/hyperv/private.h
index 509bedaafa..354fc7f685 100644
--- a/xen/arch/x86/guest/hyperv/private.h
+++ b/xen/arch/x86/guest/hyperv/private.h
@@ -29,7 +29,15 @@ DECLARE_PER_CPU(void *, hv_input_page);
 DECLARE_PER_CPU(void *, hv_vp_assist);
 DECLARE_PER_CPU(unsigned int, hv_vp_index);
 
+static inline unsigned int hv_vp_index(unsigned int cpu)
+{
+    return per_cpu(hv_vp_index, cpu);
+}
+
 int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
                      unsigned int flags);
 
+/* Returns number of banks, -ev if error */
+int cpumask_to_vpset(struct hv_vpset *vpset, const cpumask_t *mask);
+
 #endif /* __XEN_HYPERV_PRIVIATE_H__  */
diff --git a/xen/arch/x86/guest/hyperv/tlb.c b/xen/arch/x86/guest/hyperv/tlb.c
index 48f527229e..1d723d6ee6 100644
--- a/xen/arch/x86/guest/hyperv/tlb.c
+++ b/xen/arch/x86/guest/hyperv/tlb.c
@@ -19,17 +19,190 @@
  * Copyright (c) 2020 Microsoft.
  */
 
+#include <xen/cpu.h>
 #include <xen/cpumask.h>
 #include <xen/errno.h>
 
+#include <asm/guest/hyperv.h>
+#include <asm/guest/hyperv-hcall.h>
+#include <asm/guest/hyperv-tlfs.h>
+
 #include "private.h"
 
+/*
+ * It is possible to encode up to 4096 pages using the lower 12 bits
+ * in an element of gva_list
+ */
+#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
+
+static unsigned int fill_gva_list(uint64_t *gva_list, const void *va,
+                                  unsigned int order)
+{
+    unsigned long cur = (unsigned long)va;
+    /* end is 1 past the range to be flushed */
+    unsigned long end = cur + (PAGE_SIZE << order);
+    unsigned int n = 0;
+
+    do {
+        unsigned long diff = end - cur;
+
+        gva_list[n] = cur & PAGE_MASK;
+
+        /*
+         * Use lower 12 bits to encode the number of additional pages
+         * to flush
+         */
+        if ( diff >= HV_TLB_FLUSH_UNIT )
+        {
+            gva_list[n] |= ~PAGE_MASK;
+            cur += HV_TLB_FLUSH_UNIT;
+        }
+        else
+        {
+            gva_list[n] |= (diff - 1) >> PAGE_SHIFT;
+            cur = end;
+        }
+
+        n++;
+    } while ( cur < end );
+
+    return n;
+}
+
+static uint64_t flush_tlb_ex(const cpumask_t *mask, const void *va,
+                             unsigned int flags)
+{
+    struct hv_tlb_flush_ex *flush = this_cpu(hv_input_page);
+    int nr_banks;
+    unsigned int max_gvas, order = (flags - 1) & FLUSH_ORDER_MASK;
+    uint64_t *gva_list;
+
+    if ( !flush || local_irq_is_enabled() )
+    {
+        ASSERT_UNREACHABLE();
+        return ~0ULL;
+    }
+
+    if ( !(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED) )
+        return ~0ULL;
+
+    flush->address_space = 0;
+    flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+    if ( !(flags & FLUSH_TLB_GLOBAL) )
+        flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+
+    nr_banks = cpumask_to_vpset(&flush->hv_vp_set, mask);
+    if ( nr_banks < 0 )
+        return ~0ULL;
+
+    max_gvas =
+        (PAGE_SIZE - sizeof(*flush) - nr_banks *
+         sizeof(flush->hv_vp_set.bank_contents[0])) /
+        sizeof(uint64_t);       /* gva is represented as uint64_t */
+
+    /*
+     * Flush the entire address space if va is NULL or if there is not
+     * enough space for gva_list.
+     */
+    if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > max_gvas )
+        return hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, 0,
+                                   nr_banks, virt_to_maddr(flush), 0);
+
+    /*
+     * The calculation of gva_list address requires the structure to
+     * be 64 bits aligned.
+     */
+    BUILD_BUG_ON(sizeof(*flush) % sizeof(uint64_t));
+    gva_list = (uint64_t *)flush + sizeof(*flush) / sizeof(uint64_t) + nr_banks;
+
+    return hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
+                               fill_gva_list(gva_list, va, order),
+                               nr_banks, virt_to_maddr(flush), 0);
+}
+
+/* Maximum number of gvas for hv_tlb_flush */
+#define MAX_GVAS ((PAGE_SIZE - sizeof(struct hv_tlb_flush)) / sizeof(uint64_t))
+
 int hyperv_flush_tlb(const cpumask_t *mask, const void *va,
                      unsigned int flags)
 {
-    return -EOPNOTSUPP;
+    unsigned long irq_flags;
+    struct hv_tlb_flush *flush = this_cpu(hv_input_page);
+    unsigned int order = (flags - 1) & FLUSH_ORDER_MASK;
+    uint64_t ret;
+
+    if ( !flush || cpumask_empty(mask) )
+    {
+        ASSERT_UNREACHABLE();
+        return -EINVAL;
+    }
+
+    /* TODO: may need to check if in #NMI or #MC and fallback to native path */
+
+    local_irq_save(irq_flags);
+
+    flush->address_space = 0;
+    flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+    flush->processor_mask = 0;
+    if ( !(flags & FLUSH_TLB_GLOBAL) )
+        flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+
+    if ( cpumask_equal(mask, &cpu_online_map) )
+        flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+    else
+    {
+        unsigned int cpu;
+
+        /*
+         * Normally VP indices are in ascending order and match Xen's
+         * idea of CPU ids. Check the last index to see if VP index is
+         * >= 64. If so, we can skip setting up parameters for
+         * non-applicable hypercalls without looking further.
+         */
+        if ( hv_vp_index(cpumask_last(mask)) >= 64 )
+            goto do_ex_hypercall;
+
+        for_each_cpu ( cpu, mask )
+        {
+            unsigned int vpid = hv_vp_index(cpu);
+
+            if ( vpid >= ms_hyperv.max_vp_index )
+            {
+                local_irq_restore(irq_flags);
+                return -ENXIO;
+            }
+
+            if ( vpid >= 64 )
+                goto do_ex_hypercall;
+
+            __set_bit(vpid, &flush->processor_mask);
+        }
+    }
+
+    /*
+     * Flush the entire address space if va is NULL or if there is not
+     * enough space for gva_list.
+     */
+    if ( !va || (PAGE_SIZE << order) / HV_TLB_FLUSH_UNIT > MAX_GVAS )
+        ret = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+                              virt_to_maddr(flush), 0);
+    else
+        ret = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
+                                  fill_gva_list(flush->gva_list, va, order),
+                                  0, virt_to_maddr(flush), 0);
+    goto done;
+
+ do_ex_hypercall:
+    ret = flush_tlb_ex(mask, va, flags);
+
+ done:
+    local_irq_restore(irq_flags);
+
+    return ret & HV_HYPERCALL_RESULT_MASK ? -ENXIO : 0;
 }
 
+#undef MAX_GVAS
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/guest/hyperv/util.c b/xen/arch/x86/guest/hyperv/util.c
new file mode 100644
index 0000000000..bec61c2afd
--- /dev/null
+++ b/xen/arch/x86/guest/hyperv/util.c
@@ -0,0 +1,75 @@
+/******************************************************************************
+ * arch/x86/guest/hyperv/util.c
+ *
+ * Hyper-V utility functions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2020 Microsoft.
+ */
+
+#include <xen/cpu.h>
+#include <xen/cpumask.h>
+#include <xen/errno.h>
+
+#include <asm/guest/hyperv.h>
+#include <asm/guest/hyperv-tlfs.h>
+
+#include "private.h"
+
+int cpumask_to_vpset(struct hv_vpset *vpset,
+                     const cpumask_t *mask)
+{
+    int nr = 1;
+    unsigned int cpu, vcpu_bank, vcpu_offset;
+    unsigned int max_banks = ms_hyperv.max_vp_index / 64;
+
+    /* Up to 64 banks can be represented by valid_bank_mask */
+    if ( max_banks > 64 )
+        return -E2BIG;
+
+    /* Clear all banks to avoid flushing unwanted CPUs */
+    for ( vcpu_bank = 0; vcpu_bank < max_banks; vcpu_bank++ )
+        vpset->bank_contents[vcpu_bank] = 0;
+
+    vpset->format = HV_GENERIC_SET_SPARSE_4K;
+
+    for_each_cpu ( cpu, mask )
+    {
+        unsigned int vcpu = hv_vp_index(cpu);
+
+        vcpu_bank = vcpu / 64;
+        vcpu_offset = vcpu % 64;
+
+        __set_bit(vcpu_offset, &vpset->bank_contents[vcpu_bank]);
+
+        if ( vcpu_bank >= nr )
+            nr = vcpu_bank + 1;
+    }
+
+    /* Some banks may be empty but that's ok */
+    vpset->valid_bank_mask = ~0ULL >> (64 - nr);
+
+    return nr;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2020-02-19 11:44 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-19 11:44 [Xen-devel] [PATCH v4 0/3] Xen on Hyper-V: Implement L0 assisted TLB flush Wei Liu
2020-02-19 11:44 ` [Xen-devel] [PATCH v4 1/3] x86/hypervisor: pass flags to hypervisor_flush_tlb Wei Liu
2020-03-09 16:38   ` Jan Beulich
2020-03-09 17:25     ` Wei Liu
2020-03-10  9:58       ` Jan Beulich
2020-03-10 13:39         ` Wei Liu
2020-02-19 11:44 ` [Xen-devel] [PATCH v4 2/3] x86/hyperv: skeleton for L0 assisted TLB flush Wei Liu
2020-02-19 11:44 ` Wei Liu [this message]
2020-02-19 17:52   ` [Xen-devel] [PATCH v4 3/3] x86/hyperv: " Roger Pau Monné
2020-02-20 10:53   ` Durrant, Paul

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200219114411.26922-4-liuwe@microsoft.com \
    --to=wl@xen.org \
    --cc=andrew.cooper3@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=liuwe@microsoft.com \
    --cc=mikelley@microsoft.com \
    --cc=pdurrant@amazon.com \
    --cc=roger.pau@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.