All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 16/34] target/arm: Add sve infrastructure for page lookup
Date: Mon, 11 May 2020 14:33:47 +0100	[thread overview]
Message-ID: <20200511133405.5275-17-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200511133405.5275-1-peter.maydell@linaro.org>

From: Richard Henderson <richard.henderson@linaro.org>

For contiguous predicated memory operations, we want to
minimize the number of tlb lookups performed.  We have
open-coded this for sve_ld1_r, but for correctness with
MTE we will need this for all of the memory operations.

Create a structure that holds the bounds of active elements,
and metadata for two pages.  Add routines to find those
active elements, lookup the pages, and run watchpoints
for those pages.

Temporarily mark the functions unused to avoid Werror.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200508154359.7494-10-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/sve_helper.c | 263 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 261 insertions(+), 2 deletions(-)

diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index aad2c8c2371..2f053a9152d 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -1630,7 +1630,7 @@ void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc)
     }
 }
 
-/* Big-endian hosts need to frob the byte indicies.  If the copy
+/* Big-endian hosts need to frob the byte indices.  If the copy
  * happens to be 8-byte aligned, then no frobbing necessary.
  */
 static void swap_memmove(void *vd, void *vs, size_t n)
@@ -3974,7 +3974,7 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
 /*
  * Load elements into @vd, controlled by @vg, from @host + @mem_ofs.
  * Memory is valid through @host + @mem_max.  The register element
- * indicies are inferred from @mem_ofs, as modified by the types for
+ * indices are inferred from @mem_ofs, as modified by the types for
  * which the helper is built.  Return the @mem_ofs of the first element
  * not loaded (which is @mem_max if they are all loaded).
  *
@@ -4133,6 +4133,265 @@ static intptr_t max_for_page(target_ulong base, intptr_t mem_off,
     return MIN(split, mem_max - mem_off) + mem_off;
 }
 
+/*
+ * Resolve the guest virtual address to info->host and info->flags.
+ * If @nofault, return false if the page is invalid, otherwise
+ * exit via page fault exception.
+ */
+
+typedef struct {
+    void *host;
+    int flags;
+    MemTxAttrs attrs;
+} SVEHostPage;
+
+static bool sve_probe_page(SVEHostPage *info, bool nofault,
+                           CPUARMState *env, target_ulong addr,
+                           int mem_off, MMUAccessType access_type,
+                           int mmu_idx, uintptr_t retaddr)
+{
+    int flags;
+
+    addr += mem_off;
+    flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault,
+                               &info->host, retaddr);
+    info->flags = flags;
+
+    if (flags & TLB_INVALID_MASK) {
+        g_assert(nofault);
+        return false;
+    }
+
+    /* Ensure that info->host[] is relative to addr, not addr + mem_off. */
+    info->host -= mem_off;
+
+#ifdef CONFIG_USER_ONLY
+    memset(&info->attrs, 0, sizeof(info->attrs));
+#else
+    /*
+     * Find the iotlbentry for addr and return the transaction attributes.
+     * This *must* be present in the TLB because we just found the mapping.
+     */
+    {
+        uintptr_t index = tlb_index(env, mmu_idx, addr);
+
+# ifdef CONFIG_DEBUG_TCG
+        CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+        target_ulong comparator = (access_type == MMU_DATA_LOAD
+                                   ? entry->addr_read
+                                   : tlb_addr_write(entry));
+        g_assert(tlb_hit(comparator, addr));
+# endif
+
+        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+        info->attrs = iotlbentry->attrs;
+    }
+#endif
+
+    return true;
+}
+
+
+/*
+ * Analyse contiguous data, protected by a governing predicate.
+ */
+
+typedef enum {
+    FAULT_NO,
+    FAULT_FIRST,
+    FAULT_ALL,
+} SVEContFault;
+
+typedef struct {
+    /*
+     * First and last element wholly contained within the two pages.
+     * mem_off_first[0] and reg_off_first[0] are always set >= 0.
+     * reg_off_last[0] may be < 0 if the first element crosses pages.
+     * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1]
+     * are set >= 0 only if there are complete elements on a second page.
+     *
+     * The reg_off_* offsets are relative to the internal vector register.
+     * The mem_off_first offset is relative to the memory address; the
+     * two offsets are different when a load operation extends, a store
+     * operation truncates, or for multi-register operations.
+     */
+    int16_t mem_off_first[2];
+    int16_t reg_off_first[2];
+    int16_t reg_off_last[2];
+
+    /*
+     * One element that is misaligned and spans both pages,
+     * or -1 if there is no such active element.
+     */
+    int16_t mem_off_split;
+    int16_t reg_off_split;
+
+    /*
+     * The byte offset at which the entire operation crosses a page boundary.
+     * Set >= 0 if and only if the entire operation spans two pages.
+     */
+    int16_t page_split;
+
+    /* TLB data for the two pages. */
+    SVEHostPage page[2];
+} SVEContLdSt;
+
+/*
+ * Find first active element on each page, and a loose bound for the
+ * final element on each page.  Identify any single element that spans
+ * the page boundary.  Return true if there are any active elements.
+ */
+static bool __attribute__((unused))
+sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg,
+                       intptr_t reg_max, int esz, int msize)
+{
+    const int esize = 1 << esz;
+    const uint64_t pg_mask = pred_esz_masks[esz];
+    intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split;
+    intptr_t mem_off_last, mem_off_split;
+    intptr_t page_split, elt_split;
+    intptr_t i;
+
+    /* Set all of the element indices to -1, and the TLB data to 0. */
+    memset(info, -1, offsetof(SVEContLdSt, page));
+    memset(info->page, 0, sizeof(info->page));
+
+    /* Gross scan over the entire predicate to find bounds. */
+    i = 0;
+    do {
+        uint64_t pg = vg[i] & pg_mask;
+        if (pg) {
+            reg_off_last = i * 64 + 63 - clz64(pg);
+            if (reg_off_first < 0) {
+                reg_off_first = i * 64 + ctz64(pg);
+            }
+        }
+    } while (++i * 64 < reg_max);
+
+    if (unlikely(reg_off_first < 0)) {
+        /* No active elements, no pages touched. */
+        return false;
+    }
+    tcg_debug_assert(reg_off_last >= 0 && reg_off_last < reg_max);
+
+    info->reg_off_first[0] = reg_off_first;
+    info->mem_off_first[0] = (reg_off_first >> esz) * msize;
+    mem_off_last = (reg_off_last >> esz) * msize;
+
+    page_split = -(addr | TARGET_PAGE_MASK);
+    if (likely(mem_off_last + msize <= page_split)) {
+        /* The entire operation fits within a single page. */
+        info->reg_off_last[0] = reg_off_last;
+        return true;
+    }
+
+    info->page_split = page_split;
+    elt_split = page_split / msize;
+    reg_off_split = elt_split << esz;
+    mem_off_split = elt_split * msize;
+
+    /*
+     * This is the last full element on the first page, but it is not
+     * necessarily active.  If there is no full element, i.e. the first
+     * active element is the one that's split, this value remains -1.
+     * It is useful as iteration bounds.
+     */
+    if (elt_split != 0) {
+        info->reg_off_last[0] = reg_off_split - esize;
+    }
+
+    /* Determine if an unaligned element spans the pages.  */
+    if (page_split % msize != 0) {
+        /* It is helpful to know if the split element is active. */
+        if ((vg[reg_off_split >> 6] >> (reg_off_split & 63)) & 1) {
+            info->reg_off_split = reg_off_split;
+            info->mem_off_split = mem_off_split;
+
+            if (reg_off_split == reg_off_last) {
+                /* The page crossing element is last. */
+                return true;
+            }
+        }
+        reg_off_split += esize;
+        mem_off_split += msize;
+    }
+
+    /*
+     * We do want the first active element on the second page, because
+     * this may affect the address reported in an exception.
+     */
+    reg_off_split = find_next_active(vg, reg_off_split, reg_max, esz);
+    tcg_debug_assert(reg_off_split <= reg_off_last);
+    info->reg_off_first[1] = reg_off_split;
+    info->mem_off_first[1] = (reg_off_split >> esz) * msize;
+    info->reg_off_last[1] = reg_off_last;
+    return true;
+}
+
+/*
+ * Resolve the guest virtual addresses to info->page[].
+ * Control the generation of page faults with @fault.  Return false if
+ * there is no work to do, which can only happen with @fault == FAULT_NO.
+ */
+static bool __attribute__((unused))
+sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, CPUARMState *env,
+                    target_ulong addr, MMUAccessType access_type,
+                    uintptr_t retaddr)
+{
+    int mmu_idx = cpu_mmu_index(env, false);
+    int mem_off = info->mem_off_first[0];
+    bool nofault = fault == FAULT_NO;
+    bool have_work = true;
+
+    if (!sve_probe_page(&info->page[0], nofault, env, addr, mem_off,
+                        access_type, mmu_idx, retaddr)) {
+        /* No work to be done. */
+        return false;
+    }
+
+    if (likely(info->page_split < 0)) {
+        /* The entire operation was on the one page. */
+        return true;
+    }
+
+    /*
+     * If the second page is invalid, then we want the fault address to be
+     * the first byte on that page which is accessed.
+     */
+    if (info->mem_off_split >= 0) {
+        /*
+         * There is an element split across the pages.  The fault address
+         * should be the first byte of the second page.
+         */
+        mem_off = info->page_split;
+        /*
+         * If the split element is also the first active element
+         * of the vector, then:  For first-fault we should continue
+         * to generate faults for the second page.  For no-fault,
+         * we have work only if the second page is valid.
+         */
+        if (info->mem_off_first[0] < info->mem_off_split) {
+            nofault = FAULT_FIRST;
+            have_work = false;
+        }
+    } else {
+        /*
+         * There is no element split across the pages.  The fault address
+         * should be the first active element on the second page.
+         */
+        mem_off = info->mem_off_first[1];
+        /*
+         * There must have been one active element on the first page,
+         * so we're out of first-fault territory.
+         */
+        nofault = fault != FAULT_ALL;
+    }
+
+    have_work |= sve_probe_page(&info->page[1], nofault, env, addr, mem_off,
+                                access_type, mmu_idx, retaddr);
+    return have_work;
+}
+
 /*
  * The result of tlb_vaddr_to_host for user-only is just g2h(x),
  * which is always non-null.  Elide the useless test.
-- 
2.20.1



  parent reply	other threads:[~2020-05-11 13:45 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-11 13:33 [PULL 00/34] target-arm queue Peter Maydell
2020-05-11 13:33 ` [PULL 01/34] aspeed: Add boot stub for smp booting Peter Maydell
2020-05-11 13:33 ` [PULL 02/34] target/arm: Drop access_el3_aa32ns_aa64any() Peter Maydell
2020-05-11 13:33 ` [PULL 03/34] aspeed: Support AST2600A1 silicon revision Peter Maydell
2020-05-11 13:33 ` [PULL 04/34] aspeed: sdmc: Implement AST2600 locking behaviour Peter Maydell
2020-05-11 13:33 ` [PULL 05/34] hw/arm/nrf51: Add NRF51_PERIPHERAL_SIZE definition Peter Maydell
2020-05-11 13:33 ` [PULL 06/34] hw/timer/nrf51_timer: Display timer ID in trace events Peter Maydell
2020-05-11 13:33 ` [PULL 07/34] hw/timer/nrf51_timer: Add trace event of counter value update Peter Maydell
2020-05-11 13:33 ` [PULL 08/34] exec: Add block comments for watchpoint routines Peter Maydell
2020-05-11 13:33 ` [PULL 09/34] exec: Fix cpu_watchpoint_address_matches address length Peter Maydell
2020-05-11 13:33 ` [PULL 10/34] accel/tcg: Add block comment for probe_access Peter Maydell
2020-05-11 13:33 ` [PULL 11/34] accel/tcg: Adjust probe_access call to page_check_range Peter Maydell
2020-05-11 13:33 ` [PULL 12/34] accel/tcg: Add probe_access_flags Peter Maydell
2020-05-11 13:33 ` [PULL 13/34] accel/tcg: Add endian-specific cpu_{ld, st}* operations Peter Maydell
2020-05-11 13:33 ` [PULL 14/34] target/arm: Use cpu_*_data_ra for sve_ldst_tlb_fn Peter Maydell
2020-05-11 13:33 ` [PULL 15/34] target/arm: Drop manual handling of set/clear_helper_retaddr Peter Maydell
2020-05-11 13:33 ` Peter Maydell [this message]
2020-05-11 13:33 ` [PULL 17/34] target/arm: Adjust interface of sve_ld1_host_fn Peter Maydell
2020-05-11 13:33 ` [PULL 18/34] target/arm: Use SVEContLdSt in sve_ld1_r Peter Maydell
2020-05-11 13:33 ` [PULL 19/34] target/arm: Handle watchpoints " Peter Maydell
2020-05-11 13:33 ` [PULL 20/34] target/arm: Use SVEContLdSt for multi-register contiguous loads Peter Maydell
2020-05-11 13:33 ` [PULL 21/34] target/arm: Update contiguous first-fault and no-fault loads Peter Maydell
2020-05-11 13:33 ` [PULL 22/34] target/arm: Use SVEContLdSt for contiguous stores Peter Maydell
2020-05-11 13:33 ` [PULL 23/34] target/arm: Reuse sve_probe_page for gather first-fault loads Peter Maydell
2020-05-11 13:33 ` [PULL 24/34] target/arm: Reuse sve_probe_page for scatter stores Peter Maydell
2020-05-11 13:33 ` [PULL 25/34] target/arm: Reuse sve_probe_page for gather loads Peter Maydell
2020-05-11 13:33 ` [PULL 26/34] target/arm: Remove sve_memopidx Peter Maydell
2020-05-11 13:33 ` [PULL 27/34] target/arm/kvm: Inline set_feature() calls Peter Maydell
2020-05-11 13:33 ` [PULL 28/34] target/arm: Make set_feature() available for other files Peter Maydell
2020-05-11 13:34 ` [PULL 29/34] target/arm/cpu: Use ARRAY_SIZE() to iterate over ARMCPUInfo[] Peter Maydell
2020-05-11 13:34 ` [PULL 30/34] target/arm/cpu: Restrict v8M IDAU interface to Aarch32 CPUs Peter Maydell
2020-05-11 13:34 ` [PULL 31/34] target/arm: Restrict TCG cpus to TCG accel Peter Maydell
2020-05-11 13:34 ` [PULL 32/34] hw/arm/musicpal: Map the UART devices unconditionally Peter Maydell
2020-05-11 13:34 ` [PULL 33/34] target/arm: Use tcg_gen_gvec_5_ptr for sve FMLA/FCMLA Peter Maydell
2020-05-11 13:34 ` [PULL 34/34] target/arm: Fix tcg_gen_gvec_dup_imm vs DUP (indexed) Peter Maydell
2020-05-11 14:03 ` [PULL 00/34] target-arm queue Peter Maydell
2020-05-11 14:25   ` Peter Maydell
2020-05-11 15:17 ` no-reply
2020-05-11 15:44 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200511133405.5275-17-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.