qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Cornelia Huck <cohuck@redhat.com>
To: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-s390x@nongnu.org,
	Richard Henderson <richard.henderson@linaro.org>,
	qemu-devel@nongnu.org, David Hildenbrand <david@redhat.com>
Subject: [Qemu-devel] [PULL 17/34] s390x/tcg: Fault-safe memmove
Date: Thu, 19 Sep 2019 14:40:58 +0200	[thread overview]
Message-ID: <20190919124115.11510-18-cohuck@redhat.com> (raw)
In-Reply-To: <20190919124115.11510-1-cohuck@redhat.com>

From: David Hildenbrand <david@redhat.com>

Replace fast_memmove() variants by access_memmove() variants, that
first try to probe access to all affected pages (maximum is two pages).

Introduce access_get_byte()/access_set_byte(). We might be able to speed
up memmove in special cases even further (do single-byte access, use
memmove() for remaining bytes in page), however, we'll skip that for now.

In MVCOS, simply always call access_memmove_as() and drop the TODO
about LAP. LAP is already handled in the MMU.

Get rid of adj_len_to_page(), which is now unused.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/mem_helper.c | 232 ++++++++++++++++++++++----------------
 1 file changed, 133 insertions(+), 99 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index dd5da7074610..e50cec9263c2 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -65,17 +65,6 @@ static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
     return dest > src && dest <= src + len - 1;
 }
 
-/* Reduce the length so that addr + len doesn't cross a page boundary.  */
-static inline uint32_t adj_len_to_page(uint32_t len, uint64_t addr)
-{
-#ifndef CONFIG_USER_ONLY
-    if ((addr & ~TARGET_PAGE_MASK) + len - 1 >= TARGET_PAGE_SIZE) {
-        return -(addr | TARGET_PAGE_MASK);
-    }
-#endif
-    return len;
-}
-
 /* Trigger a SPECIFICATION exception if an address or a length is not
    naturally aligned.  */
 static inline void check_alignment(CPUS390XState *env, uint64_t v,
@@ -208,39 +197,110 @@ static void access_memset(CPUS390XState *env, S390Access *desta,
                      desta->mmu_idx, ra);
 }
 
-#ifndef CONFIG_USER_ONLY
-static void fast_memmove_idx(CPUS390XState *env, uint64_t dest, uint64_t src,
-                             uint32_t len, int dest_idx, int src_idx,
-                             uintptr_t ra)
+static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
+                                  int offset, int mmu_idx, uintptr_t ra)
 {
-    TCGMemOpIdx oi_dest = make_memop_idx(MO_UB, dest_idx);
-    TCGMemOpIdx oi_src = make_memop_idx(MO_UB, src_idx);
-    uint32_t len_adj;
-    void *src_p;
-    void *dest_p;
-    uint8_t x;
-
-    while (len > 0) {
-        src = wrap_address(env, src);
-        dest = wrap_address(env, dest);
-        src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, src_idx);
-        dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, dest_idx);
-
-        if (src_p && dest_p) {
-            /* Access to both whole pages granted.  */
-            len_adj = adj_len_to_page(adj_len_to_page(len, src), dest);
-            memmove(dest_p, src_p, len_adj);
-        } else {
-            /* We failed to get access to one or both whole pages. The next
-               read or write access will likely fill the QEMU TLB for the
-               next iteration.  */
-            len_adj = 1;
-            x = helper_ret_ldub_mmu(env, src, oi_src, ra);
-            helper_ret_stb_mmu(env, dest, x, oi_dest, ra);
+#ifdef CONFIG_USER_ONLY
+    return ldub_p(*haddr + offset);
+#else
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    uint8_t byte;
+
+    if (likely(*haddr)) {
+        return ldub_p(*haddr + offset);
+    }
+    /*
+     * Do a single access and test if we can then get access to the
+     * page. This is especially relevant to speed up TLB_NOTDIRTY.
+     */
+    byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
+    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
+    return byte;
+#endif
+}
+
+static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
+                               int offset, uintptr_t ra)
+{
+    if (offset < access->size1) {
+        return do_access_get_byte(env, access->vaddr1, &access->haddr1,
+                                  offset, access->mmu_idx, ra);
+    }
+    return do_access_get_byte(env, access->vaddr2, &access->haddr2,
+                              offset - access->size1, access->mmu_idx, ra);
+}
+
+static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
+                               int offset, uint8_t byte, int mmu_idx,
+                               uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+    stb_p(*haddr + offset, byte);
+#else
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+
+    if (likely(*haddr)) {
+        stb_p(*haddr + offset, byte);
+        return;
+    }
+    /*
+     * Do a single access and test if we can then get access to the
+     * page. This is especially relevant to speed up TLB_NOTDIRTY.
+     */
+    helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
+    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
+#endif
+}
+
+static void access_set_byte(CPUS390XState *env, S390Access *access,
+                            int offset, uint8_t byte, uintptr_t ra)
+{
+    if (offset < access->size1) {
+        do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
+                           access->mmu_idx, ra);
+    } else {
+        do_access_set_byte(env, access->vaddr2, &access->haddr2,
+                           offset - access->size1, byte, access->mmu_idx, ra);
+    }
+}
+
+/*
+ * Move data with the same semantics as memmove() in case ranges don't overlap
+ * or src > dest. Undefined behavior on destructive overlaps.
+ */
+static void access_memmove(CPUS390XState *env, S390Access *desta,
+                           S390Access *srca, uintptr_t ra)
+{
+    int diff;
+
+    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
+
+    /* Fallback to slow access in case we don't have access to all host pages */
+    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
+                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
+        int i;
+
+        for (i = 0; i < desta->size1 + desta->size2; i++) {
+            uint8_t byte = access_get_byte(env, srca, i, ra);
+
+            access_set_byte(env, desta, i, byte, ra);
         }
-        src += len_adj;
-        dest += len_adj;
-        len -= len_adj;
+        return;
+    }
+
+    if (srca->size1 == desta->size1) {
+        memmove(desta->haddr1, srca->haddr1, srca->size1);
+        memmove(desta->haddr2, srca->haddr2, srca->size2);
+    } else if (srca->size1 < desta->size1) {
+        diff = desta->size1 - srca->size1;
+        memmove(desta->haddr1, srca->haddr1, srca->size1);
+        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
+        memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
+    } else {
+        diff = srca->size1 - desta->size1;
+        memmove(desta->haddr1, srca->haddr1, desta->size1);
+        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
+        memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
     }
 }
 
@@ -259,45 +319,6 @@ static int mmu_idx_from_as(uint8_t as)
     }
 }
 
-static void fast_memmove_as(CPUS390XState *env, uint64_t dest, uint64_t src,
-                            uint32_t len, uint8_t dest_as, uint8_t src_as,
-                            uintptr_t ra)
-{
-    int src_idx = mmu_idx_from_as(src_as);
-    int dest_idx = mmu_idx_from_as(dest_as);
-
-    fast_memmove_idx(env, dest, src, len, dest_idx, src_idx, ra);
-}
-#endif
-
-static void fast_memmove(CPUS390XState *env, uint64_t dest, uint64_t src,
-                         uint32_t l, uintptr_t ra)
-{
-    int mmu_idx = cpu_mmu_index(env, false);
-
-    while (l > 0) {
-        void *src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, mmu_idx);
-        void *dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
-        if (src_p && dest_p) {
-            /* Access to both whole pages granted.  */
-            uint32_t l_adj = adj_len_to_page(l, src);
-            l_adj = adj_len_to_page(l_adj, dest);
-            memmove(dest_p, src_p, l_adj);
-            src += l_adj;
-            dest += l_adj;
-            l -= l_adj;
-        } else {
-            /* We failed to get access to one or both whole pages. The next
-               read or write access will likely fill the QEMU TLB for the
-               next iteration.  */
-            cpu_stb_data_ra(env, dest, cpu_ldub_data_ra(env, src, ra), ra);
-            src++;
-            dest++;
-            l--;
-        }
-    }
-}
-
 /* and on array */
 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
                              uint64_t src, uintptr_t ra)
@@ -388,7 +409,7 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
                               uint64_t src, uintptr_t ra)
 {
     const int mmu_idx = cpu_mmu_index(env, false);
-    S390Access desta;
+    S390Access srca, desta;
     uint32_t i;
 
     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
@@ -397,6 +418,7 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
     /* MVC always copies one more byte than specified - maximum is 256 */
     l++;
 
+    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 
     /*
@@ -405,9 +427,9 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
      * behave like memmove().
      */
     if (dest == src + 1) {
-        access_memset(env, &desta, cpu_ldub_data_ra(env, src, ra), ra);
+        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
     } else if (!is_destructive_overlap(env, dest, src, l)) {
-        fast_memmove(env, dest, src, l, ra);
+        access_memmove(env, &desta, &srca, ra);
     } else {
         for (i = 0; i < l; i++) {
             uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
@@ -756,8 +778,11 @@ uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 /* move page */
 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2)
 {
+    const int mmu_idx = cpu_mmu_index(env, false);
     const bool f = extract64(r0, 11, 1);
     const bool s = extract64(r0, 10, 1);
+    uintptr_t ra = GETPC();
+    S390Access srca, desta;
 
     if ((f && s) || extract64(r0, 12, 4)) {
         s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, GETPC());
@@ -772,7 +797,11 @@ uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2)
      * - CC-option with surpression of page-translation exceptions
      * - Store r1/r2 register identifiers at real location 162
      */
-    fast_memmove(env, r1, r2, TARGET_PAGE_SIZE, GETPC());
+    srca = access_prepare(env, r2, TARGET_PAGE_SIZE, MMU_DATA_LOAD, mmu_idx,
+                          ra);
+    desta = access_prepare(env, r1, TARGET_PAGE_SIZE, MMU_DATA_STORE, mmu_idx,
+                           ra);
+    access_memmove(env, &desta, &srca, ra);
     return 0; /* data moved */
 }
 
@@ -853,7 +882,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
 {
     const int mmu_idx = cpu_mmu_index(env, false);
     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
-    S390Access desta;
+    S390Access srca, desta;
     int i, cc;
 
     if (*destlen == *srclen) {
@@ -877,7 +906,9 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
         *destlen -= len;
         *srclen -= len;
-        fast_memmove(env, *dest, *src, len, ra);
+        srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
+        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
+        access_memmove(env, &desta, &srca, ra);
         *src = wrap_address(env, *src + len);
         *dest = wrap_address(env, *dest + len);
     } else if (wordsize == 1) {
@@ -911,8 +942,8 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
     uint64_t src = get_address(env, r2);
     uint8_t pad = env->regs[r2 + 1] >> 24;
+    S390Access srca, desta;
     uint32_t cc, cur_len;
-    S390Access desta;
 
     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
         cc = 3;
@@ -946,7 +977,11 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
         } else {
             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
 
-            fast_memmove(env, dest, src, cur_len, ra);
+            srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
+                                  ra);
+            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
+                                   ra);
+            access_memmove(env, &desta, &srca, ra);
             src = wrap_address(env, src + cur_len);
             srclen -= cur_len;
             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
@@ -2488,16 +2523,15 @@ uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
         s390_program_interrupt(env, PGM_ADDRESSING, 6, ra);
     }
 
-    /* FIXME: a) LAP
-     *        b) Access using correct keys
-     *        c) AR-mode
-     */
-#ifdef CONFIG_USER_ONLY
-    /* psw keys are never valid in user mode, we will never reach this */
-    g_assert_not_reached();
-#else
-    fast_memmove_as(env, dest, src, len, dest_as, src_as, ra);
-#endif
+    /* FIXME: Access using correct keys and AR-mode */
+    if (len) {
+        S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
+                                         mmu_idx_from_as(src_as), ra);
+        S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
+                                          mmu_idx_from_as(dest_as), ra);
+
+        access_memmove(env, &desta, &srca, ra);
+    }
 
     return cc;
 }
-- 
2.20.1



  parent reply	other threads:[~2019-09-19 13:09 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-19 12:40 [Qemu-devel] [PULL 00/34] s390x update Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 01/34] s390x/tcg: Reset exception_index to -1 instead of 0 Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 02/34] s390x/tcg: MVCL: Zero out unused bits of address Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 03/34] s390x/tcg: MVCL: Detect destructive overlaps Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 04/34] s390x/tcg: MVCL: Process max 4k bytes at a time Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 05/34] s390x/tcg: MVC: Increment the length once Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 06/34] s390x/tcg: MVC: Use is_destructive_overlap() Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 07/34] s390x/tcg: MVPG: Check for specification exceptions Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 08/34] s390x/tcg: MVPG: Properly wrap the addresses Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 09/34] s390x/tcg: MVCLU/MVCLE: Process max 4k bytes at a time Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 10/34] s390x/tcg: MVCS/MVCP: Check for special operation exceptions Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 11/34] s390x/tcg: MVCOS: Lengths are 32 bit in 24/31-bit mode Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 12/34] s390x/tcg: MVCS/MVCP: Properly wrap the length Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 13/34] s390x/tcg: MVST: Check for specification exceptions Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 14/34] s390x/tcg: MVST: Fix storing back the addresses to registers Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 15/34] s390x/tcg: Always use MMU_USER_IDX for CONFIG_USER_ONLY Cornelia Huck
2019-09-19 12:40 ` [Qemu-devel] [PULL 16/34] s390x/tcg: Fault-safe memset Cornelia Huck
2019-09-19 12:40 ` Cornelia Huck [this message]
2019-09-19 12:40 ` [Qemu-devel] [PULL 18/34] s390x/tcg: MVCS/MVCP: Use access_memmove() Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 19/34] s390x/tcg: MVC: Fault-safe handling on destructive overlaps Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 20/34] s390x/tcg: MVCLU: Fault-safe handling Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 21/34] s390x/tcg: OC: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 22/34] s390x/tcg: XC: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 23/34] s390x/tcg: NC: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 24/34] s390x/tcg: MVCIN: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 25/34] s390x/tcg: MVN: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 26/34] s390x/tcg: MVZ: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 27/34] s390x/tcg: MVST: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 28/34] s390x/tcg: MVO: " Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 29/34] tests/tcg: target/s390x: Test MVO Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 30/34] pc-bios/s390-ccw: Do not pre-initialize empty array Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 31/34] pc-bios/s390-ccw/net: fix a possible memory leak in get_uuid() Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 32/34] pc-bios/s390-ccw: Rebuild the s390-netboot.img firmware image Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 33/34] s390x/kvm: Officially require at least kernel 3.15 Cornelia Huck
2019-09-19 12:41 ` [Qemu-devel] [PULL 34/34] s390x/cpumodel: Add the z15 name to the description of gen15a Cornelia Huck
2019-09-20 10:45 ` [PULL 00/34] s390x update Peter Maydell
2019-09-20 11:00   ` Cornelia Huck
2019-09-20 11:51     ` David Hildenbrand
2019-09-20 11:59       ` David Hildenbrand
2019-09-20 13:32         ` Cornelia Huck
2019-09-20 13:41           ` Peter Maydell
2019-09-20 15:34             ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190919124115.11510-18-cohuck@redhat.com \
    --to=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).